]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
arm-protos.h (offset_ok_for_ldrd_strd): New declaration.
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
58 #include "dumpfile.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 struct four_ints
67 {
68 int i[4];
69 };
70
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
188
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
196
197 static void arm_file_end (void);
198 static void arm_file_start (void);
199
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
217
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree);
240 static const char *arm_invalid_parameter_type (const_tree t);
241 static const char *arm_invalid_return_type (const_tree t);
242 static tree arm_promoted_type (const_tree t);
243 static tree arm_convert_to_type (tree type, tree expr);
244 static bool arm_scalar_mode_supported_p (enum machine_mode);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx, tree, rtx);
249 static rtx arm_trampoline_adjust_address (rtx);
250 static rtx arm_pic_static_addr (rtx orig, rtx reg);
251 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode,
255 unsigned HOST_WIDE_INT);
256 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
257 static bool arm_class_likely_spilled_p (reg_class_t);
258 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
261 const_tree type,
262 int misalignment,
263 bool is_packed);
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
269
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
271 const unsigned char *sel);
272
273 \f
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table[] =
276 {
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
281 call. */
282 { "long_call", 0, 0, false, true, true, NULL, false },
283 /* Whereas these functions are always known to reside within the 26 bit
284 addressing range. */
285 { "short_call", 0, 0, false, true, true, NULL, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
288 false },
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
291 false },
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
293 false },
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 false },
296 #ifdef ARM_PE
297 /* ARM/PE has three new attributes:
298 interfacearm - ?
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
301
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
304 multiple times.
305 */
306 { "dllimport", 0, 0, true, false, false, NULL, false },
307 { "dllexport", 0, 0, true, false, false, NULL, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 false },
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
314 false },
315 #endif
316 { NULL, 0, 0, false, false, false, NULL, false }
317 };
318 \f
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
323 #endif
324
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
327
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
330
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
335
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
340
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
347
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
350
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
353
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
356
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
359
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
362
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
365
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
368
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
371
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
374
375 #undef TARGET_ENCODE_SECTION_INFO
376 #ifdef ARM_PE
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
378 #else
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
380 #endif
381
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
384
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
387
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
390
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
393
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
396
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
399
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
404
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
409
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
421
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
424
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
431
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
434
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
449
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
452
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
455
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
462
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
465
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
499
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
511
512 #if ARM_UNWIND_INFO
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
519
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
522
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
529
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
535
536 #ifdef HAVE_AS_TLS
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
539 #endif
540
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
552
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
558
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
564
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
571
572 #ifdef HAVE_AS_TLS
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
575 #endif
576
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
582
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
585
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
588
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
591
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
594
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
597
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
600
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
603
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
606
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
609
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
612
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
616
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
620
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
624
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
628
629 struct gcc_target targetm = TARGET_INITIALIZER;
630 \f
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack;
633 static char * minipool_startobj;
634
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped = 5;
638
639 extern FILE * asm_out_file;
640
641 /* True if we are currently building a constant table. */
642 int making_const_table;
643
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune = arm_none;
646
647 /* The current tuning set. */
648 const struct tune_params *current_tune;
649
650 /* Which floating point hardware to schedule for. */
651 int arm_fpu_attr;
652
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc *arm_fpu_desc;
655
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label[14];
658 static int thumb_call_reg_needed;
659
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
681 profile. */
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
686 architecture. */
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
690
691 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
692 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
693
694 /* Flags that only effect tuning, not available instructions. */
695 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
696 | FL_CO_PROC)
697
698 #define FL_FOR_ARCH2 FL_NOTM
699 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
700 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
701 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
702 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
703 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
704 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
705 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
706 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
707 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
708 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
709 #define FL_FOR_ARCH6J FL_FOR_ARCH6
710 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
711 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
712 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
713 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
714 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
715 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
716 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
717 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
719 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
720 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
721 | FL_ARM_DIV | FL_NOTM)
722
723 /* The bits in this mask specify which
724 instructions we are allowed to generate. */
725 static unsigned long insn_flags = 0;
726
727 /* The bits in this mask specify which instruction scheduling options should
728 be used. */
729 static unsigned long tune_flags = 0;
730
731 /* The highest ARM architecture version supported by the
732 target. */
733 enum base_architecture arm_base_arch = BASE_ARCH_0;
734
735 /* The following are used in the arm.md file as equivalents to bits
736 in the above two flag variables. */
737
738 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
739 int arm_arch3m = 0;
740
741 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
742 int arm_arch4 = 0;
743
744 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
745 int arm_arch4t = 0;
746
747 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
748 int arm_arch5 = 0;
749
750 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
751 int arm_arch5e = 0;
752
753 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
754 int arm_arch6 = 0;
755
756 /* Nonzero if this chip supports the ARM 6K extensions. */
757 int arm_arch6k = 0;
758
759 /* Nonzero if instructions present in ARMv6-M can be used. */
760 int arm_arch6m = 0;
761
762 /* Nonzero if this chip supports the ARM 7 extensions. */
763 int arm_arch7 = 0;
764
765 /* Nonzero if instructions not present in the 'M' profile can be used. */
766 int arm_arch_notm = 0;
767
768 /* Nonzero if instructions present in ARMv7E-M can be used. */
769 int arm_arch7em = 0;
770
771 /* Nonzero if instructions present in ARMv8 can be used. */
772 int arm_arch8 = 0;
773
774 /* Nonzero if this chip can benefit from load scheduling. */
775 int arm_ld_sched = 0;
776
777 /* Nonzero if this chip is a StrongARM. */
778 int arm_tune_strongarm = 0;
779
780 /* Nonzero if this chip supports Intel Wireless MMX technology. */
781 int arm_arch_iwmmxt = 0;
782
783 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
784 int arm_arch_iwmmxt2 = 0;
785
786 /* Nonzero if this chip is an XScale. */
787 int arm_arch_xscale = 0;
788
789 /* Nonzero if tuning for XScale */
790 int arm_tune_xscale = 0;
791
792 /* Nonzero if we want to tune for stores that access the write-buffer.
793 This typically means an ARM6 or ARM7 with MMU or MPU. */
794 int arm_tune_wbuf = 0;
795
796 /* Nonzero if tuning for Cortex-A9. */
797 int arm_tune_cortex_a9 = 0;
798
799 /* Nonzero if generating Thumb instructions. */
800 int thumb_code = 0;
801
802 /* Nonzero if generating Thumb-1 instructions. */
803 int thumb1_code = 0;
804
805 /* Nonzero if we should define __THUMB_INTERWORK__ in the
806 preprocessor.
807 XXX This is a bit of a hack, it's intended to help work around
808 problems in GLD which doesn't understand that armv5t code is
809 interworking clean. */
810 int arm_cpp_interwork = 0;
811
812 /* Nonzero if chip supports Thumb 2. */
813 int arm_arch_thumb2;
814
815 /* Nonzero if chip supports integer division instruction. */
816 int arm_arch_arm_hwdiv;
817 int arm_arch_thumb_hwdiv;
818
819 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
820 we must report the mode of the memory reference from
821 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
822 enum machine_mode output_memory_reference_mode;
823
824 /* The register number to be used for the PIC offset register. */
825 unsigned arm_pic_register = INVALID_REGNUM;
826
827 /* Set to 1 after arm_reorg has started. Reset to start at the start of
828 the next function. */
829 static int after_arm_reorg = 0;
830
831 enum arm_pcs arm_pcs_default;
832
833 /* For an explanation of these variables, see final_prescan_insn below. */
834 int arm_ccfsm_state;
835 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
836 enum arm_cond_code arm_current_cc;
837
838 rtx arm_target_insn;
839 int arm_target_label;
840 /* The number of conditionally executed insns, including the current insn. */
841 int arm_condexec_count = 0;
842 /* A bitmask specifying the patterns for the IT block.
843 Zero means do not output an IT block before this insn. */
844 int arm_condexec_mask = 0;
845 /* The number of bits used in arm_condexec_mask. */
846 int arm_condexec_masklen = 0;
847
848 /* The condition codes of the ARM, and the inverse function. */
849 static const char * const arm_condition_codes[] =
850 {
851 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
852 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
853 };
854
855 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
856 int arm_regs_in_sequence[] =
857 {
858 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
859 };
860
861 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
862 #define streq(string1, string2) (strcmp (string1, string2) == 0)
863
864 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
865 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
866 | (1 << PIC_OFFSET_TABLE_REGNUM)))
867 \f
868 /* Initialization code. */
869
870 struct processors
871 {
872 const char *const name;
873 enum processor_type core;
874 const char *arch;
875 enum base_architecture base_arch;
876 const unsigned long flags;
877 const struct tune_params *const tune;
878 };
879
880
881 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
882 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
883 prefetch_slots, \
884 l1_size, \
885 l1_line_size
886
887 const struct tune_params arm_slowmul_tune =
888 {
889 arm_slowmul_rtx_costs,
890 NULL,
891 3, /* Constant limit. */
892 5, /* Max cond insns. */
893 ARM_PREFETCH_NOT_BENEFICIAL,
894 true, /* Prefer constant pool. */
895 arm_default_branch_cost,
896 false /* Prefer LDRD/STRD. */
897 };
898
899 const struct tune_params arm_fastmul_tune =
900 {
901 arm_fastmul_rtx_costs,
902 NULL,
903 1, /* Constant limit. */
904 5, /* Max cond insns. */
905 ARM_PREFETCH_NOT_BENEFICIAL,
906 true, /* Prefer constant pool. */
907 arm_default_branch_cost,
908 false /* Prefer LDRD/STRD. */
909 };
910
911 /* StrongARM has early execution of branches, so a sequence that is worth
912 skipping is shorter. Set max_insns_skipped to a lower value. */
913
914 const struct tune_params arm_strongarm_tune =
915 {
916 arm_fastmul_rtx_costs,
917 NULL,
918 1, /* Constant limit. */
919 3, /* Max cond insns. */
920 ARM_PREFETCH_NOT_BENEFICIAL,
921 true, /* Prefer constant pool. */
922 arm_default_branch_cost,
923 false /* Prefer LDRD/STRD. */
924 };
925
926 const struct tune_params arm_xscale_tune =
927 {
928 arm_xscale_rtx_costs,
929 xscale_sched_adjust_cost,
930 2, /* Constant limit. */
931 3, /* Max cond insns. */
932 ARM_PREFETCH_NOT_BENEFICIAL,
933 true, /* Prefer constant pool. */
934 arm_default_branch_cost,
935 false /* Prefer LDRD/STRD. */
936 };
937
938 const struct tune_params arm_9e_tune =
939 {
940 arm_9e_rtx_costs,
941 NULL,
942 1, /* Constant limit. */
943 5, /* Max cond insns. */
944 ARM_PREFETCH_NOT_BENEFICIAL,
945 true, /* Prefer constant pool. */
946 arm_default_branch_cost,
947 false /* Prefer LDRD/STRD. */
948 };
949
950 const struct tune_params arm_v6t2_tune =
951 {
952 arm_9e_rtx_costs,
953 NULL,
954 1, /* Constant limit. */
955 5, /* Max cond insns. */
956 ARM_PREFETCH_NOT_BENEFICIAL,
957 false, /* Prefer constant pool. */
958 arm_default_branch_cost,
959 false /* Prefer LDRD/STRD. */
960 };
961
962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
963 const struct tune_params arm_cortex_tune =
964 {
965 arm_9e_rtx_costs,
966 NULL,
967 1, /* Constant limit. */
968 5, /* Max cond insns. */
969 ARM_PREFETCH_NOT_BENEFICIAL,
970 false, /* Prefer constant pool. */
971 arm_default_branch_cost,
972 false /* Prefer LDRD/STRD. */
973 };
974
975 const struct tune_params arm_cortex_a15_tune =
976 {
977 arm_9e_rtx_costs,
978 NULL,
979 1, /* Constant limit. */
980 5, /* Max cond insns. */
981 ARM_PREFETCH_NOT_BENEFICIAL,
982 false, /* Prefer constant pool. */
983 arm_default_branch_cost,
984 true /* Prefer LDRD/STRD. */
985 };
986
987 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
988 less appealing. Set max_insns_skipped to a low value. */
989
990 const struct tune_params arm_cortex_a5_tune =
991 {
992 arm_9e_rtx_costs,
993 NULL,
994 1, /* Constant limit. */
995 1, /* Max cond insns. */
996 ARM_PREFETCH_NOT_BENEFICIAL,
997 false, /* Prefer constant pool. */
998 arm_cortex_a5_branch_cost,
999 false /* Prefer LDRD/STRD. */
1000 };
1001
1002 const struct tune_params arm_cortex_a9_tune =
1003 {
1004 arm_9e_rtx_costs,
1005 cortex_a9_sched_adjust_cost,
1006 1, /* Constant limit. */
1007 5, /* Max cond insns. */
1008 ARM_PREFETCH_BENEFICIAL(4,32,32),
1009 false, /* Prefer constant pool. */
1010 arm_default_branch_cost,
1011 false /* Prefer LDRD/STRD. */
1012 };
1013
1014 const struct tune_params arm_fa726te_tune =
1015 {
1016 arm_9e_rtx_costs,
1017 fa726te_sched_adjust_cost,
1018 1, /* Constant limit. */
1019 5, /* Max cond insns. */
1020 ARM_PREFETCH_NOT_BENEFICIAL,
1021 true, /* Prefer constant pool. */
1022 arm_default_branch_cost,
1023 false /* Prefer LDRD/STRD. */
1024 };
1025
1026
1027 /* Not all of these give usefully different compilation alternatives,
1028 but there is no simple way of generalizing them. */
1029 static const struct processors all_cores[] =
1030 {
1031 /* ARM Cores */
1032 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1033 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1034 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1035 #include "arm-cores.def"
1036 #undef ARM_CORE
1037 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1038 };
1039
1040 static const struct processors all_architectures[] =
1041 {
1042 /* ARM Architectures */
1043 /* We don't specify tuning costs here as it will be figured out
1044 from the core. */
1045
1046 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1047 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1048 #include "arm-arches.def"
1049 #undef ARM_ARCH
1050 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1051 };
1052
1053
1054 /* These are populated as commandline arguments are processed, or NULL
1055 if not specified. */
1056 static const struct processors *arm_selected_arch;
1057 static const struct processors *arm_selected_cpu;
1058 static const struct processors *arm_selected_tune;
1059
1060 /* The name of the preprocessor macro to define for this architecture. */
1061
1062 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1063
1064 /* Available values for -mfpu=. */
1065
1066 static const struct arm_fpu_desc all_fpus[] =
1067 {
1068 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1069 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1070 #include "arm-fpus.def"
1071 #undef ARM_FPU
1072 };
1073
1074
1075 /* Supported TLS relocations. */
1076
1077 enum tls_reloc {
1078 TLS_GD32,
1079 TLS_LDM32,
1080 TLS_LDO32,
1081 TLS_IE32,
1082 TLS_LE32,
1083 TLS_DESCSEQ /* GNU scheme */
1084 };
1085
1086 /* The maximum number of insns to be used when loading a constant. */
1087 inline static int
1088 arm_constant_limit (bool size_p)
1089 {
1090 return size_p ? 1 : current_tune->constant_limit;
1091 }
1092
1093 /* Emit an insn that's a simple single-set. Both the operands must be known
1094 to be valid. */
1095 inline static rtx
1096 emit_set_insn (rtx x, rtx y)
1097 {
1098 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1099 }
1100
1101 /* Return the number of bits set in VALUE. */
1102 static unsigned
1103 bit_count (unsigned long value)
1104 {
1105 unsigned long count = 0;
1106
1107 while (value)
1108 {
1109 count++;
1110 value &= value - 1; /* Clear the least-significant set bit. */
1111 }
1112
1113 return count;
1114 }
1115
1116 typedef struct
1117 {
1118 enum machine_mode mode;
1119 const char *name;
1120 } arm_fixed_mode_set;
1121
1122 /* A small helper for setting fixed-point library libfuncs. */
1123
1124 static void
1125 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1126 const char *funcname, const char *modename,
1127 int num_suffix)
1128 {
1129 char buffer[50];
1130
1131 if (num_suffix == 0)
1132 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1133 else
1134 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1135
1136 set_optab_libfunc (optable, mode, buffer);
1137 }
1138
1139 static void
1140 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1141 enum machine_mode from, const char *funcname,
1142 const char *toname, const char *fromname)
1143 {
1144 char buffer[50];
1145 const char *maybe_suffix_2 = "";
1146
1147 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1148 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1149 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1150 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1151 maybe_suffix_2 = "2";
1152
1153 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1154 maybe_suffix_2);
1155
1156 set_conv_libfunc (optable, to, from, buffer);
1157 }
1158
1159 /* Set up library functions unique to ARM. */
1160
1161 static void
1162 arm_init_libfuncs (void)
1163 {
1164 /* For Linux, we have access to kernel support for atomic operations. */
1165 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1166 init_sync_libfuncs (2 * UNITS_PER_WORD);
1167
1168 /* There are no special library functions unless we are using the
1169 ARM BPABI. */
1170 if (!TARGET_BPABI)
1171 return;
1172
1173 /* The functions below are described in Section 4 of the "Run-Time
1174 ABI for the ARM architecture", Version 1.0. */
1175
1176 /* Double-precision floating-point arithmetic. Table 2. */
1177 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1178 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1179 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1180 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1181 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1182
1183 /* Double-precision comparisons. Table 3. */
1184 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1185 set_optab_libfunc (ne_optab, DFmode, NULL);
1186 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1187 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1188 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1189 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1190 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1191
1192 /* Single-precision floating-point arithmetic. Table 4. */
1193 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1194 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1195 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1196 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1197 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1198
1199 /* Single-precision comparisons. Table 5. */
1200 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1201 set_optab_libfunc (ne_optab, SFmode, NULL);
1202 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1203 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1204 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1205 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1206 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1207
1208 /* Floating-point to integer conversions. Table 6. */
1209 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1210 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1211 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1212 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1213 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1214 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1215 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1216 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1217
1218 /* Conversions between floating types. Table 7. */
1219 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1220 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1221
1222 /* Integer to floating-point conversions. Table 8. */
1223 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1224 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1225 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1226 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1227 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1228 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1229 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1230 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1231
1232 /* Long long. Table 9. */
1233 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1234 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1235 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1236 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1237 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1238 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1239 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1240 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1241
1242 /* Integer (32/32->32) division. \S 4.3.1. */
1243 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1244 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1245
1246 /* The divmod functions are designed so that they can be used for
1247 plain division, even though they return both the quotient and the
1248 remainder. The quotient is returned in the usual location (i.e.,
1249 r0 for SImode, {r0, r1} for DImode), just as would be expected
1250 for an ordinary division routine. Because the AAPCS calling
1251 conventions specify that all of { r0, r1, r2, r3 } are
1252 callee-saved registers, there is no need to tell the compiler
1253 explicitly that those registers are clobbered by these
1254 routines. */
1255 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1256 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1257
1258 /* For SImode division the ABI provides div-without-mod routines,
1259 which are faster. */
1260 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1261 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1262
1263 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1264 divmod libcalls instead. */
1265 set_optab_libfunc (smod_optab, DImode, NULL);
1266 set_optab_libfunc (umod_optab, DImode, NULL);
1267 set_optab_libfunc (smod_optab, SImode, NULL);
1268 set_optab_libfunc (umod_optab, SImode, NULL);
1269
1270 /* Half-precision float operations. The compiler handles all operations
1271 with NULL libfuncs by converting the SFmode. */
1272 switch (arm_fp16_format)
1273 {
1274 case ARM_FP16_FORMAT_IEEE:
1275 case ARM_FP16_FORMAT_ALTERNATIVE:
1276
1277 /* Conversions. */
1278 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1279 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1280 ? "__gnu_f2h_ieee"
1281 : "__gnu_f2h_alternative"));
1282 set_conv_libfunc (sext_optab, SFmode, HFmode,
1283 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1284 ? "__gnu_h2f_ieee"
1285 : "__gnu_h2f_alternative"));
1286
1287 /* Arithmetic. */
1288 set_optab_libfunc (add_optab, HFmode, NULL);
1289 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1290 set_optab_libfunc (smul_optab, HFmode, NULL);
1291 set_optab_libfunc (neg_optab, HFmode, NULL);
1292 set_optab_libfunc (sub_optab, HFmode, NULL);
1293
1294 /* Comparisons. */
1295 set_optab_libfunc (eq_optab, HFmode, NULL);
1296 set_optab_libfunc (ne_optab, HFmode, NULL);
1297 set_optab_libfunc (lt_optab, HFmode, NULL);
1298 set_optab_libfunc (le_optab, HFmode, NULL);
1299 set_optab_libfunc (ge_optab, HFmode, NULL);
1300 set_optab_libfunc (gt_optab, HFmode, NULL);
1301 set_optab_libfunc (unord_optab, HFmode, NULL);
1302 break;
1303
1304 default:
1305 break;
1306 }
1307
1308 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1309 {
1310 const arm_fixed_mode_set fixed_arith_modes[] =
1311 {
1312 { QQmode, "qq" },
1313 { UQQmode, "uqq" },
1314 { HQmode, "hq" },
1315 { UHQmode, "uhq" },
1316 { SQmode, "sq" },
1317 { USQmode, "usq" },
1318 { DQmode, "dq" },
1319 { UDQmode, "udq" },
1320 { TQmode, "tq" },
1321 { UTQmode, "utq" },
1322 { HAmode, "ha" },
1323 { UHAmode, "uha" },
1324 { SAmode, "sa" },
1325 { USAmode, "usa" },
1326 { DAmode, "da" },
1327 { UDAmode, "uda" },
1328 { TAmode, "ta" },
1329 { UTAmode, "uta" }
1330 };
1331 const arm_fixed_mode_set fixed_conv_modes[] =
1332 {
1333 { QQmode, "qq" },
1334 { UQQmode, "uqq" },
1335 { HQmode, "hq" },
1336 { UHQmode, "uhq" },
1337 { SQmode, "sq" },
1338 { USQmode, "usq" },
1339 { DQmode, "dq" },
1340 { UDQmode, "udq" },
1341 { TQmode, "tq" },
1342 { UTQmode, "utq" },
1343 { HAmode, "ha" },
1344 { UHAmode, "uha" },
1345 { SAmode, "sa" },
1346 { USAmode, "usa" },
1347 { DAmode, "da" },
1348 { UDAmode, "uda" },
1349 { TAmode, "ta" },
1350 { UTAmode, "uta" },
1351 { QImode, "qi" },
1352 { HImode, "hi" },
1353 { SImode, "si" },
1354 { DImode, "di" },
1355 { TImode, "ti" },
1356 { SFmode, "sf" },
1357 { DFmode, "df" }
1358 };
1359 unsigned int i, j;
1360
1361 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1362 {
1363 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1364 "add", fixed_arith_modes[i].name, 3);
1365 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1366 "ssadd", fixed_arith_modes[i].name, 3);
1367 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1368 "usadd", fixed_arith_modes[i].name, 3);
1369 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1370 "sub", fixed_arith_modes[i].name, 3);
1371 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1372 "sssub", fixed_arith_modes[i].name, 3);
1373 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1374 "ussub", fixed_arith_modes[i].name, 3);
1375 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1376 "mul", fixed_arith_modes[i].name, 3);
1377 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1378 "ssmul", fixed_arith_modes[i].name, 3);
1379 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1380 "usmul", fixed_arith_modes[i].name, 3);
1381 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1382 "div", fixed_arith_modes[i].name, 3);
1383 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1384 "udiv", fixed_arith_modes[i].name, 3);
1385 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1386 "ssdiv", fixed_arith_modes[i].name, 3);
1387 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1388 "usdiv", fixed_arith_modes[i].name, 3);
1389 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1390 "neg", fixed_arith_modes[i].name, 2);
1391 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1392 "ssneg", fixed_arith_modes[i].name, 2);
1393 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1394 "usneg", fixed_arith_modes[i].name, 2);
1395 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1396 "ashl", fixed_arith_modes[i].name, 3);
1397 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1398 "ashr", fixed_arith_modes[i].name, 3);
1399 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1400 "lshr", fixed_arith_modes[i].name, 3);
1401 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1402 "ssashl", fixed_arith_modes[i].name, 3);
1403 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1404 "usashl", fixed_arith_modes[i].name, 3);
1405 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1406 "cmp", fixed_arith_modes[i].name, 2);
1407 }
1408
1409 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1410 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1411 {
1412 if (i == j
1413 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1414 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1415 continue;
1416
1417 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1418 fixed_conv_modes[j].mode, "fract",
1419 fixed_conv_modes[i].name,
1420 fixed_conv_modes[j].name);
1421 arm_set_fixed_conv_libfunc (satfract_optab,
1422 fixed_conv_modes[i].mode,
1423 fixed_conv_modes[j].mode, "satfract",
1424 fixed_conv_modes[i].name,
1425 fixed_conv_modes[j].name);
1426 arm_set_fixed_conv_libfunc (fractuns_optab,
1427 fixed_conv_modes[i].mode,
1428 fixed_conv_modes[j].mode, "fractuns",
1429 fixed_conv_modes[i].name,
1430 fixed_conv_modes[j].name);
1431 arm_set_fixed_conv_libfunc (satfractuns_optab,
1432 fixed_conv_modes[i].mode,
1433 fixed_conv_modes[j].mode, "satfractuns",
1434 fixed_conv_modes[i].name,
1435 fixed_conv_modes[j].name);
1436 }
1437 }
1438
1439 if (TARGET_AAPCS_BASED)
1440 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1441 }
1442
1443 /* On AAPCS systems, this is the "struct __va_list". */
1444 static GTY(()) tree va_list_type;
1445
1446 /* Return the type to use as __builtin_va_list. */
1447 static tree
1448 arm_build_builtin_va_list (void)
1449 {
1450 tree va_list_name;
1451 tree ap_field;
1452
1453 if (!TARGET_AAPCS_BASED)
1454 return std_build_builtin_va_list ();
1455
1456 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1457 defined as:
1458
1459 struct __va_list
1460 {
1461 void *__ap;
1462 };
1463
1464 The C Library ABI further reinforces this definition in \S
1465 4.1.
1466
1467 We must follow this definition exactly. The structure tag
1468 name is visible in C++ mangled names, and thus forms a part
1469 of the ABI. The field name may be used by people who
1470 #include <stdarg.h>. */
1471 /* Create the type. */
1472 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1473 /* Give it the required name. */
1474 va_list_name = build_decl (BUILTINS_LOCATION,
1475 TYPE_DECL,
1476 get_identifier ("__va_list"),
1477 va_list_type);
1478 DECL_ARTIFICIAL (va_list_name) = 1;
1479 TYPE_NAME (va_list_type) = va_list_name;
1480 TYPE_STUB_DECL (va_list_type) = va_list_name;
1481 /* Create the __ap field. */
1482 ap_field = build_decl (BUILTINS_LOCATION,
1483 FIELD_DECL,
1484 get_identifier ("__ap"),
1485 ptr_type_node);
1486 DECL_ARTIFICIAL (ap_field) = 1;
1487 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1488 TYPE_FIELDS (va_list_type) = ap_field;
1489 /* Compute its layout. */
1490 layout_type (va_list_type);
1491
1492 return va_list_type;
1493 }
1494
1495 /* Return an expression of type "void *" pointing to the next
1496 available argument in a variable-argument list. VALIST is the
1497 user-level va_list object, of type __builtin_va_list. */
1498 static tree
1499 arm_extract_valist_ptr (tree valist)
1500 {
1501 if (TREE_TYPE (valist) == error_mark_node)
1502 return error_mark_node;
1503
1504 /* On an AAPCS target, the pointer is stored within "struct
1505 va_list". */
1506 if (TARGET_AAPCS_BASED)
1507 {
1508 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1509 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1510 valist, ap_field, NULL_TREE);
1511 }
1512
1513 return valist;
1514 }
1515
1516 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1517 static void
1518 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1519 {
1520 valist = arm_extract_valist_ptr (valist);
1521 std_expand_builtin_va_start (valist, nextarg);
1522 }
1523
1524 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1525 static tree
1526 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1527 gimple_seq *post_p)
1528 {
1529 valist = arm_extract_valist_ptr (valist);
1530 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1531 }
1532
1533 /* Fix up any incompatible options that the user has specified. */
1534 static void
1535 arm_option_override (void)
1536 {
1537 if (global_options_set.x_arm_arch_option)
1538 arm_selected_arch = &all_architectures[arm_arch_option];
1539
1540 if (global_options_set.x_arm_cpu_option)
1541 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1542
1543 if (global_options_set.x_arm_tune_option)
1544 arm_selected_tune = &all_cores[(int) arm_tune_option];
1545
1546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1547 SUBTARGET_OVERRIDE_OPTIONS;
1548 #endif
1549
1550 if (arm_selected_arch)
1551 {
1552 if (arm_selected_cpu)
1553 {
1554 /* Check for conflict between mcpu and march. */
1555 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1556 {
1557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1558 arm_selected_cpu->name, arm_selected_arch->name);
1559 /* -march wins for code generation.
1560 -mcpu wins for default tuning. */
1561 if (!arm_selected_tune)
1562 arm_selected_tune = arm_selected_cpu;
1563
1564 arm_selected_cpu = arm_selected_arch;
1565 }
1566 else
1567 /* -mcpu wins. */
1568 arm_selected_arch = NULL;
1569 }
1570 else
1571 /* Pick a CPU based on the architecture. */
1572 arm_selected_cpu = arm_selected_arch;
1573 }
1574
1575 /* If the user did not specify a processor, choose one for them. */
1576 if (!arm_selected_cpu)
1577 {
1578 const struct processors * sel;
1579 unsigned int sought;
1580
1581 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1582 if (!arm_selected_cpu->name)
1583 {
1584 #ifdef SUBTARGET_CPU_DEFAULT
1585 /* Use the subtarget default CPU if none was specified by
1586 configure. */
1587 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1588 #endif
1589 /* Default to ARM6. */
1590 if (!arm_selected_cpu->name)
1591 arm_selected_cpu = &all_cores[arm6];
1592 }
1593
1594 sel = arm_selected_cpu;
1595 insn_flags = sel->flags;
1596
1597 /* Now check to see if the user has specified some command line
1598 switch that require certain abilities from the cpu. */
1599 sought = 0;
1600
1601 if (TARGET_INTERWORK || TARGET_THUMB)
1602 {
1603 sought |= (FL_THUMB | FL_MODE32);
1604
1605 /* There are no ARM processors that support both APCS-26 and
1606 interworking. Therefore we force FL_MODE26 to be removed
1607 from insn_flags here (if it was set), so that the search
1608 below will always be able to find a compatible processor. */
1609 insn_flags &= ~FL_MODE26;
1610 }
1611
1612 if (sought != 0 && ((sought & insn_flags) != sought))
1613 {
1614 /* Try to locate a CPU type that supports all of the abilities
1615 of the default CPU, plus the extra abilities requested by
1616 the user. */
1617 for (sel = all_cores; sel->name != NULL; sel++)
1618 if ((sel->flags & sought) == (sought | insn_flags))
1619 break;
1620
1621 if (sel->name == NULL)
1622 {
1623 unsigned current_bit_count = 0;
1624 const struct processors * best_fit = NULL;
1625
1626 /* Ideally we would like to issue an error message here
1627 saying that it was not possible to find a CPU compatible
1628 with the default CPU, but which also supports the command
1629 line options specified by the programmer, and so they
1630 ought to use the -mcpu=<name> command line option to
1631 override the default CPU type.
1632
1633 If we cannot find a cpu that has both the
1634 characteristics of the default cpu and the given
1635 command line options we scan the array again looking
1636 for a best match. */
1637 for (sel = all_cores; sel->name != NULL; sel++)
1638 if ((sel->flags & sought) == sought)
1639 {
1640 unsigned count;
1641
1642 count = bit_count (sel->flags & insn_flags);
1643
1644 if (count >= current_bit_count)
1645 {
1646 best_fit = sel;
1647 current_bit_count = count;
1648 }
1649 }
1650
1651 gcc_assert (best_fit);
1652 sel = best_fit;
1653 }
1654
1655 arm_selected_cpu = sel;
1656 }
1657 }
1658
1659 gcc_assert (arm_selected_cpu);
1660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1661 if (!arm_selected_tune)
1662 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1663
1664 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1665 insn_flags = arm_selected_cpu->flags;
1666 arm_base_arch = arm_selected_cpu->base_arch;
1667
1668 arm_tune = arm_selected_tune->core;
1669 tune_flags = arm_selected_tune->flags;
1670 current_tune = arm_selected_tune->tune;
1671
1672 /* Make sure that the processor choice does not conflict with any of the
1673 other command line choices. */
1674 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1675 error ("target CPU does not support ARM mode");
1676
1677 /* BPABI targets use linker tricks to allow interworking on cores
1678 without thumb support. */
1679 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1680 {
1681 warning (0, "target CPU does not support interworking" );
1682 target_flags &= ~MASK_INTERWORK;
1683 }
1684
1685 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1686 {
1687 warning (0, "target CPU does not support THUMB instructions");
1688 target_flags &= ~MASK_THUMB;
1689 }
1690
1691 if (TARGET_APCS_FRAME && TARGET_THUMB)
1692 {
1693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1694 target_flags &= ~MASK_APCS_FRAME;
1695 }
1696
1697 /* Callee super interworking implies thumb interworking. Adding
1698 this to the flags here simplifies the logic elsewhere. */
1699 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1700 target_flags |= MASK_INTERWORK;
1701
1702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1703 from here where no function is being compiled currently. */
1704 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1706
1707 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1709
1710 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1711 {
1712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1713 target_flags |= MASK_APCS_FRAME;
1714 }
1715
1716 if (TARGET_POKE_FUNCTION_NAME)
1717 target_flags |= MASK_APCS_FRAME;
1718
1719 if (TARGET_APCS_REENT && flag_pic)
1720 error ("-fpic and -mapcs-reent are incompatible");
1721
1722 if (TARGET_APCS_REENT)
1723 warning (0, "APCS reentrant code not supported. Ignored");
1724
1725 /* If this target is normally configured to use APCS frames, warn if they
1726 are turned off and debugging is turned on. */
1727 if (TARGET_ARM
1728 && write_symbols != NO_DEBUG
1729 && !TARGET_APCS_FRAME
1730 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1732
1733 if (TARGET_APCS_FLOAT)
1734 warning (0, "passing floating point arguments in fp regs not yet supported");
1735
1736 if (TARGET_LITTLE_WORDS)
1737 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1738 "will be removed in a future release");
1739
1740 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1741 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1742 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1743 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1744 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1745 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1746 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1747 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1748 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1749 arm_arch6m = arm_arch6 && !arm_arch_notm;
1750 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1751 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1752 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1753 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1754 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1755
1756 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1757 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1758 thumb_code = TARGET_ARM == 0;
1759 thumb1_code = TARGET_THUMB1 != 0;
1760 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1761 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1762 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1763 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1764 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1765 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1766 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1767
1768 /* If we are not using the default (ARM mode) section anchor offset
1769 ranges, then set the correct ranges now. */
1770 if (TARGET_THUMB1)
1771 {
1772 /* Thumb-1 LDR instructions cannot have negative offsets.
1773 Permissible positive offset ranges are 5-bit (for byte loads),
1774 6-bit (for halfword loads), or 7-bit (for word loads).
1775 Empirical results suggest a 7-bit anchor range gives the best
1776 overall code size. */
1777 targetm.min_anchor_offset = 0;
1778 targetm.max_anchor_offset = 127;
1779 }
1780 else if (TARGET_THUMB2)
1781 {
1782 /* The minimum is set such that the total size of the block
1783 for a particular anchor is 248 + 1 + 4095 bytes, which is
1784 divisible by eight, ensuring natural spacing of anchors. */
1785 targetm.min_anchor_offset = -248;
1786 targetm.max_anchor_offset = 4095;
1787 }
1788
1789 /* V5 code we generate is completely interworking capable, so we turn off
1790 TARGET_INTERWORK here to avoid many tests later on. */
1791
1792 /* XXX However, we must pass the right pre-processor defines to CPP
1793 or GLD can get confused. This is a hack. */
1794 if (TARGET_INTERWORK)
1795 arm_cpp_interwork = 1;
1796
1797 if (arm_arch5)
1798 target_flags &= ~MASK_INTERWORK;
1799
1800 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1801 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1802
1803 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1804 error ("iwmmxt abi requires an iwmmxt capable cpu");
1805
1806 if (!global_options_set.x_arm_fpu_index)
1807 {
1808 const char *target_fpu_name;
1809 bool ok;
1810
1811 #ifdef FPUTYPE_DEFAULT
1812 target_fpu_name = FPUTYPE_DEFAULT;
1813 #else
1814 target_fpu_name = "vfp";
1815 #endif
1816
1817 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1818 CL_TARGET);
1819 gcc_assert (ok);
1820 }
1821
1822 arm_fpu_desc = &all_fpus[arm_fpu_index];
1823
1824 switch (arm_fpu_desc->model)
1825 {
1826 case ARM_FP_MODEL_VFP:
1827 arm_fpu_attr = FPU_VFP;
1828 break;
1829
1830 default:
1831 gcc_unreachable();
1832 }
1833
1834 if (TARGET_AAPCS_BASED)
1835 {
1836 if (TARGET_CALLER_INTERWORKING)
1837 error ("AAPCS does not support -mcaller-super-interworking");
1838 else
1839 if (TARGET_CALLEE_INTERWORKING)
1840 error ("AAPCS does not support -mcallee-super-interworking");
1841 }
1842
1843 /* iWMMXt and NEON are incompatible. */
1844 if (TARGET_IWMMXT && TARGET_NEON)
1845 error ("iWMMXt and NEON are incompatible");
1846
1847 /* iWMMXt unsupported under Thumb mode. */
1848 if (TARGET_THUMB && TARGET_IWMMXT)
1849 error ("iWMMXt unsupported under Thumb mode");
1850
1851 /* __fp16 support currently assumes the core has ldrh. */
1852 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1853 sorry ("__fp16 and no ldrh");
1854
1855 /* If soft-float is specified then don't use FPU. */
1856 if (TARGET_SOFT_FLOAT)
1857 arm_fpu_attr = FPU_NONE;
1858
1859 if (TARGET_AAPCS_BASED)
1860 {
1861 if (arm_abi == ARM_ABI_IWMMXT)
1862 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1863 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1864 && TARGET_HARD_FLOAT
1865 && TARGET_VFP)
1866 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1867 else
1868 arm_pcs_default = ARM_PCS_AAPCS;
1869 }
1870 else
1871 {
1872 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1873 sorry ("-mfloat-abi=hard and VFP");
1874
1875 if (arm_abi == ARM_ABI_APCS)
1876 arm_pcs_default = ARM_PCS_APCS;
1877 else
1878 arm_pcs_default = ARM_PCS_ATPCS;
1879 }
1880
1881 /* For arm2/3 there is no need to do any scheduling if we are doing
1882 software floating-point. */
1883 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1884 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1885
1886 /* Use the cp15 method if it is available. */
1887 if (target_thread_pointer == TP_AUTO)
1888 {
1889 if (arm_arch6k && !TARGET_THUMB1)
1890 target_thread_pointer = TP_CP15;
1891 else
1892 target_thread_pointer = TP_SOFT;
1893 }
1894
1895 if (TARGET_HARD_TP && TARGET_THUMB1)
1896 error ("can not use -mtp=cp15 with 16-bit Thumb");
1897
1898 /* Override the default structure alignment for AAPCS ABI. */
1899 if (!global_options_set.x_arm_structure_size_boundary)
1900 {
1901 if (TARGET_AAPCS_BASED)
1902 arm_structure_size_boundary = 8;
1903 }
1904 else
1905 {
1906 if (arm_structure_size_boundary != 8
1907 && arm_structure_size_boundary != 32
1908 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1909 {
1910 if (ARM_DOUBLEWORD_ALIGN)
1911 warning (0,
1912 "structure size boundary can only be set to 8, 32 or 64");
1913 else
1914 warning (0, "structure size boundary can only be set to 8 or 32");
1915 arm_structure_size_boundary
1916 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1917 }
1918 }
1919
1920 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1921 {
1922 error ("RTP PIC is incompatible with Thumb");
1923 flag_pic = 0;
1924 }
1925
1926 /* If stack checking is disabled, we can use r10 as the PIC register,
1927 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1928 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1929 {
1930 if (TARGET_VXWORKS_RTP)
1931 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1932 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1933 }
1934
1935 if (flag_pic && TARGET_VXWORKS_RTP)
1936 arm_pic_register = 9;
1937
1938 if (arm_pic_register_string != NULL)
1939 {
1940 int pic_register = decode_reg_name (arm_pic_register_string);
1941
1942 if (!flag_pic)
1943 warning (0, "-mpic-register= is useless without -fpic");
1944
1945 /* Prevent the user from choosing an obviously stupid PIC register. */
1946 else if (pic_register < 0 || call_used_regs[pic_register]
1947 || pic_register == HARD_FRAME_POINTER_REGNUM
1948 || pic_register == STACK_POINTER_REGNUM
1949 || pic_register >= PC_REGNUM
1950 || (TARGET_VXWORKS_RTP
1951 && (unsigned int) pic_register != arm_pic_register))
1952 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1953 else
1954 arm_pic_register = pic_register;
1955 }
1956
1957 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1958 if (fix_cm3_ldrd == 2)
1959 {
1960 if (arm_selected_cpu->core == cortexm3)
1961 fix_cm3_ldrd = 1;
1962 else
1963 fix_cm3_ldrd = 0;
1964 }
1965
1966 /* Enable -munaligned-access by default for
1967 - all ARMv6 architecture-based processors
1968 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1969 - ARMv8 architecture-base processors.
1970
1971 Disable -munaligned-access by default for
1972 - all pre-ARMv6 architecture-based processors
1973 - ARMv6-M architecture-based processors. */
1974
1975 if (unaligned_access == 2)
1976 {
1977 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1978 unaligned_access = 1;
1979 else
1980 unaligned_access = 0;
1981 }
1982 else if (unaligned_access == 1
1983 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1984 {
1985 warning (0, "target CPU does not support unaligned accesses");
1986 unaligned_access = 0;
1987 }
1988
1989 if (TARGET_THUMB1 && flag_schedule_insns)
1990 {
1991 /* Don't warn since it's on by default in -O2. */
1992 flag_schedule_insns = 0;
1993 }
1994
1995 if (optimize_size)
1996 {
1997 /* If optimizing for size, bump the number of instructions that we
1998 are prepared to conditionally execute (even on a StrongARM). */
1999 max_insns_skipped = 6;
2000 }
2001 else
2002 max_insns_skipped = current_tune->max_insns_skipped;
2003
2004 /* Hot/Cold partitioning is not currently supported, since we can't
2005 handle literal pool placement in that case. */
2006 if (flag_reorder_blocks_and_partition)
2007 {
2008 inform (input_location,
2009 "-freorder-blocks-and-partition not supported on this architecture");
2010 flag_reorder_blocks_and_partition = 0;
2011 flag_reorder_blocks = 1;
2012 }
2013
2014 if (flag_pic)
2015 /* Hoisting PIC address calculations more aggressively provides a small,
2016 but measurable, size reduction for PIC code. Therefore, we decrease
2017 the bar for unrestricted expression hoisting to the cost of PIC address
2018 calculation, which is 2 instructions. */
2019 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2020 global_options.x_param_values,
2021 global_options_set.x_param_values);
2022
2023 /* ARM EABI defaults to strict volatile bitfields. */
2024 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2025 && abi_version_at_least(2))
2026 flag_strict_volatile_bitfields = 1;
2027
2028 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2029 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2030 if (flag_prefetch_loop_arrays < 0
2031 && HAVE_prefetch
2032 && optimize >= 3
2033 && current_tune->num_prefetch_slots > 0)
2034 flag_prefetch_loop_arrays = 1;
2035
2036 /* Set up parameters to be used in prefetching algorithm. Do not override the
2037 defaults unless we are tuning for a core we have researched values for. */
2038 if (current_tune->num_prefetch_slots > 0)
2039 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2040 current_tune->num_prefetch_slots,
2041 global_options.x_param_values,
2042 global_options_set.x_param_values);
2043 if (current_tune->l1_cache_line_size >= 0)
2044 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2045 current_tune->l1_cache_line_size,
2046 global_options.x_param_values,
2047 global_options_set.x_param_values);
2048 if (current_tune->l1_cache_size >= 0)
2049 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2050 current_tune->l1_cache_size,
2051 global_options.x_param_values,
2052 global_options_set.x_param_values);
2053
2054 /* Use the alternative scheduling-pressure algorithm by default. */
2055 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2056 global_options.x_param_values,
2057 global_options_set.x_param_values);
2058
2059 /* Register global variables with the garbage collector. */
2060 arm_add_gc_roots ();
2061 }
2062
2063 static void
2064 arm_add_gc_roots (void)
2065 {
2066 gcc_obstack_init(&minipool_obstack);
2067 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2068 }
2069 \f
2070 /* A table of known ARM exception types.
2071 For use with the interrupt function attribute. */
2072
2073 typedef struct
2074 {
2075 const char *const arg;
2076 const unsigned long return_value;
2077 }
2078 isr_attribute_arg;
2079
2080 static const isr_attribute_arg isr_attribute_args [] =
2081 {
2082 { "IRQ", ARM_FT_ISR },
2083 { "irq", ARM_FT_ISR },
2084 { "FIQ", ARM_FT_FIQ },
2085 { "fiq", ARM_FT_FIQ },
2086 { "ABORT", ARM_FT_ISR },
2087 { "abort", ARM_FT_ISR },
2088 { "ABORT", ARM_FT_ISR },
2089 { "abort", ARM_FT_ISR },
2090 { "UNDEF", ARM_FT_EXCEPTION },
2091 { "undef", ARM_FT_EXCEPTION },
2092 { "SWI", ARM_FT_EXCEPTION },
2093 { "swi", ARM_FT_EXCEPTION },
2094 { NULL, ARM_FT_NORMAL }
2095 };
2096
2097 /* Returns the (interrupt) function type of the current
2098 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2099
2100 static unsigned long
2101 arm_isr_value (tree argument)
2102 {
2103 const isr_attribute_arg * ptr;
2104 const char * arg;
2105
2106 if (!arm_arch_notm)
2107 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2108
2109 /* No argument - default to IRQ. */
2110 if (argument == NULL_TREE)
2111 return ARM_FT_ISR;
2112
2113 /* Get the value of the argument. */
2114 if (TREE_VALUE (argument) == NULL_TREE
2115 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2116 return ARM_FT_UNKNOWN;
2117
2118 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2119
2120 /* Check it against the list of known arguments. */
2121 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2122 if (streq (arg, ptr->arg))
2123 return ptr->return_value;
2124
2125 /* An unrecognized interrupt type. */
2126 return ARM_FT_UNKNOWN;
2127 }
2128
2129 /* Computes the type of the current function. */
2130
2131 static unsigned long
2132 arm_compute_func_type (void)
2133 {
2134 unsigned long type = ARM_FT_UNKNOWN;
2135 tree a;
2136 tree attr;
2137
2138 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2139
2140 /* Decide if the current function is volatile. Such functions
2141 never return, and many memory cycles can be saved by not storing
2142 register values that will never be needed again. This optimization
2143 was added to speed up context switching in a kernel application. */
2144 if (optimize > 0
2145 && (TREE_NOTHROW (current_function_decl)
2146 || !(flag_unwind_tables
2147 || (flag_exceptions
2148 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2149 && TREE_THIS_VOLATILE (current_function_decl))
2150 type |= ARM_FT_VOLATILE;
2151
2152 if (cfun->static_chain_decl != NULL)
2153 type |= ARM_FT_NESTED;
2154
2155 attr = DECL_ATTRIBUTES (current_function_decl);
2156
2157 a = lookup_attribute ("naked", attr);
2158 if (a != NULL_TREE)
2159 type |= ARM_FT_NAKED;
2160
2161 a = lookup_attribute ("isr", attr);
2162 if (a == NULL_TREE)
2163 a = lookup_attribute ("interrupt", attr);
2164
2165 if (a == NULL_TREE)
2166 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2167 else
2168 type |= arm_isr_value (TREE_VALUE (a));
2169
2170 return type;
2171 }
2172
2173 /* Returns the type of the current function. */
2174
2175 unsigned long
2176 arm_current_func_type (void)
2177 {
2178 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2179 cfun->machine->func_type = arm_compute_func_type ();
2180
2181 return cfun->machine->func_type;
2182 }
2183
2184 bool
2185 arm_allocate_stack_slots_for_args (void)
2186 {
2187 /* Naked functions should not allocate stack slots for arguments. */
2188 return !IS_NAKED (arm_current_func_type ());
2189 }
2190
2191 static bool
2192 arm_warn_func_return (tree decl)
2193 {
2194 /* Naked functions are implemented entirely in assembly, including the
2195 return sequence, so suppress warnings about this. */
2196 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2197 }
2198
2199 \f
2200 /* Output assembler code for a block containing the constant parts
2201 of a trampoline, leaving space for the variable parts.
2202
2203 On the ARM, (if r8 is the static chain regnum, and remembering that
2204 referencing pc adds an offset of 8) the trampoline looks like:
2205 ldr r8, [pc, #0]
2206 ldr pc, [pc]
2207 .word static chain value
2208 .word function's address
2209 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2210
2211 static void
2212 arm_asm_trampoline_template (FILE *f)
2213 {
2214 if (TARGET_ARM)
2215 {
2216 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2217 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2218 }
2219 else if (TARGET_THUMB2)
2220 {
2221 /* The Thumb-2 trampoline is similar to the arm implementation.
2222 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2223 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2224 STATIC_CHAIN_REGNUM, PC_REGNUM);
2225 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2226 }
2227 else
2228 {
2229 ASM_OUTPUT_ALIGN (f, 2);
2230 fprintf (f, "\t.code\t16\n");
2231 fprintf (f, ".Ltrampoline_start:\n");
2232 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2233 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2234 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2235 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2236 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2237 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2238 }
2239 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2240 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2241 }
2242
2243 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2244
2245 static void
2246 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2247 {
2248 rtx fnaddr, mem, a_tramp;
2249
2250 emit_block_move (m_tramp, assemble_trampoline_template (),
2251 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2252
2253 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2254 emit_move_insn (mem, chain_value);
2255
2256 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2257 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2258 emit_move_insn (mem, fnaddr);
2259
2260 a_tramp = XEXP (m_tramp, 0);
2261 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2262 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2263 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2264 }
2265
2266 /* Thumb trampolines should be entered in thumb mode, so set
2267 the bottom bit of the address. */
2268
2269 static rtx
2270 arm_trampoline_adjust_address (rtx addr)
2271 {
2272 if (TARGET_THUMB)
2273 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2274 NULL, 0, OPTAB_LIB_WIDEN);
2275 return addr;
2276 }
2277 \f
2278 /* Return 1 if it is possible to return using a single instruction.
2279 If SIBLING is non-null, this is a test for a return before a sibling
2280 call. SIBLING is the call insn, so we can examine its register usage. */
2281
2282 int
2283 use_return_insn (int iscond, rtx sibling)
2284 {
2285 int regno;
2286 unsigned int func_type;
2287 unsigned long saved_int_regs;
2288 unsigned HOST_WIDE_INT stack_adjust;
2289 arm_stack_offsets *offsets;
2290
2291 /* Never use a return instruction before reload has run. */
2292 if (!reload_completed)
2293 return 0;
2294
2295 func_type = arm_current_func_type ();
2296
2297 /* Naked, volatile and stack alignment functions need special
2298 consideration. */
2299 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2300 return 0;
2301
2302 /* So do interrupt functions that use the frame pointer and Thumb
2303 interrupt functions. */
2304 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2305 return 0;
2306
2307 offsets = arm_get_frame_offsets ();
2308 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2309
2310 /* As do variadic functions. */
2311 if (crtl->args.pretend_args_size
2312 || cfun->machine->uses_anonymous_args
2313 /* Or if the function calls __builtin_eh_return () */
2314 || crtl->calls_eh_return
2315 /* Or if the function calls alloca */
2316 || cfun->calls_alloca
2317 /* Or if there is a stack adjustment. However, if the stack pointer
2318 is saved on the stack, we can use a pre-incrementing stack load. */
2319 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2320 && stack_adjust == 4)))
2321 return 0;
2322
2323 saved_int_regs = offsets->saved_regs_mask;
2324
2325 /* Unfortunately, the insn
2326
2327 ldmib sp, {..., sp, ...}
2328
2329 triggers a bug on most SA-110 based devices, such that the stack
2330 pointer won't be correctly restored if the instruction takes a
2331 page fault. We work around this problem by popping r3 along with
2332 the other registers, since that is never slower than executing
2333 another instruction.
2334
2335 We test for !arm_arch5 here, because code for any architecture
2336 less than this could potentially be run on one of the buggy
2337 chips. */
2338 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2339 {
2340 /* Validate that r3 is a call-clobbered register (always true in
2341 the default abi) ... */
2342 if (!call_used_regs[3])
2343 return 0;
2344
2345 /* ... that it isn't being used for a return value ... */
2346 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2347 return 0;
2348
2349 /* ... or for a tail-call argument ... */
2350 if (sibling)
2351 {
2352 gcc_assert (CALL_P (sibling));
2353
2354 if (find_regno_fusage (sibling, USE, 3))
2355 return 0;
2356 }
2357
2358 /* ... and that there are no call-saved registers in r0-r2
2359 (always true in the default ABI). */
2360 if (saved_int_regs & 0x7)
2361 return 0;
2362 }
2363
2364 /* Can't be done if interworking with Thumb, and any registers have been
2365 stacked. */
2366 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2367 return 0;
2368
2369 /* On StrongARM, conditional returns are expensive if they aren't
2370 taken and multiple registers have been stacked. */
2371 if (iscond && arm_tune_strongarm)
2372 {
2373 /* Conditional return when just the LR is stored is a simple
2374 conditional-load instruction, that's not expensive. */
2375 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2376 return 0;
2377
2378 if (flag_pic
2379 && arm_pic_register != INVALID_REGNUM
2380 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2381 return 0;
2382 }
2383
2384 /* If there are saved registers but the LR isn't saved, then we need
2385 two instructions for the return. */
2386 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2387 return 0;
2388
2389 /* Can't be done if any of the VFP regs are pushed,
2390 since this also requires an insn. */
2391 if (TARGET_HARD_FLOAT && TARGET_VFP)
2392 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2393 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2394 return 0;
2395
2396 if (TARGET_REALLY_IWMMXT)
2397 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2398 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2399 return 0;
2400
2401 return 1;
2402 }
2403
2404 /* Return TRUE if int I is a valid immediate ARM constant. */
2405
2406 int
2407 const_ok_for_arm (HOST_WIDE_INT i)
2408 {
2409 int lowbit;
2410
2411 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2412 be all zero, or all one. */
2413 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2414 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2415 != ((~(unsigned HOST_WIDE_INT) 0)
2416 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2417 return FALSE;
2418
2419 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2420
2421 /* Fast return for 0 and small values. We must do this for zero, since
2422 the code below can't handle that one case. */
2423 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2424 return TRUE;
2425
2426 /* Get the number of trailing zeros. */
2427 lowbit = ffs((int) i) - 1;
2428
2429 /* Only even shifts are allowed in ARM mode so round down to the
2430 nearest even number. */
2431 if (TARGET_ARM)
2432 lowbit &= ~1;
2433
2434 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2435 return TRUE;
2436
2437 if (TARGET_ARM)
2438 {
2439 /* Allow rotated constants in ARM mode. */
2440 if (lowbit <= 4
2441 && ((i & ~0xc000003f) == 0
2442 || (i & ~0xf000000f) == 0
2443 || (i & ~0xfc000003) == 0))
2444 return TRUE;
2445 }
2446 else
2447 {
2448 HOST_WIDE_INT v;
2449
2450 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2451 v = i & 0xff;
2452 v |= v << 16;
2453 if (i == v || i == (v | (v << 8)))
2454 return TRUE;
2455
2456 /* Allow repeated pattern 0xXY00XY00. */
2457 v = i & 0xff00;
2458 v |= v << 16;
2459 if (i == v)
2460 return TRUE;
2461 }
2462
2463 return FALSE;
2464 }
2465
2466 /* Return true if I is a valid constant for the operation CODE. */
2467 int
2468 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2469 {
2470 if (const_ok_for_arm (i))
2471 return 1;
2472
2473 switch (code)
2474 {
2475 case SET:
2476 /* See if we can use movw. */
2477 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2478 return 1;
2479 else
2480 /* Otherwise, try mvn. */
2481 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2482
2483 case PLUS:
2484 /* See if we can use addw or subw. */
2485 if (TARGET_THUMB2
2486 && ((i & 0xfffff000) == 0
2487 || ((-i) & 0xfffff000) == 0))
2488 return 1;
2489 /* else fall through. */
2490
2491 case COMPARE:
2492 case EQ:
2493 case NE:
2494 case GT:
2495 case LE:
2496 case LT:
2497 case GE:
2498 case GEU:
2499 case LTU:
2500 case GTU:
2501 case LEU:
2502 case UNORDERED:
2503 case ORDERED:
2504 case UNEQ:
2505 case UNGE:
2506 case UNLT:
2507 case UNGT:
2508 case UNLE:
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2510
2511 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2512 case XOR:
2513 return 0;
2514
2515 case IOR:
2516 if (TARGET_THUMB2)
2517 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2518 return 0;
2519
2520 case AND:
2521 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2522
2523 default:
2524 gcc_unreachable ();
2525 }
2526 }
2527
2528 /* Return true if I is a valid di mode constant for the operation CODE. */
2529 int
2530 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2531 {
2532 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2533 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2534 rtx hi = GEN_INT (hi_val);
2535 rtx lo = GEN_INT (lo_val);
2536
2537 if (TARGET_THUMB1)
2538 return 0;
2539
2540 switch (code)
2541 {
2542 case PLUS:
2543 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2544
2545 default:
2546 return 0;
2547 }
2548 }
2549
2550 /* Emit a sequence of insns to handle a large constant.
2551 CODE is the code of the operation required, it can be any of SET, PLUS,
2552 IOR, AND, XOR, MINUS;
2553 MODE is the mode in which the operation is being performed;
2554 VAL is the integer to operate on;
2555 SOURCE is the other operand (a register, or a null-pointer for SET);
2556 SUBTARGETS means it is safe to create scratch registers if that will
2557 either produce a simpler sequence, or we will want to cse the values.
2558 Return value is the number of insns emitted. */
2559
2560 /* ??? Tweak this for thumb2. */
2561 int
2562 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2563 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2564 {
2565 rtx cond;
2566
2567 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2568 cond = COND_EXEC_TEST (PATTERN (insn));
2569 else
2570 cond = NULL_RTX;
2571
2572 if (subtargets || code == SET
2573 || (REG_P (target) && REG_P (source)
2574 && REGNO (target) != REGNO (source)))
2575 {
2576 /* After arm_reorg has been called, we can't fix up expensive
2577 constants by pushing them into memory so we must synthesize
2578 them in-line, regardless of the cost. This is only likely to
2579 be more costly on chips that have load delay slots and we are
2580 compiling without running the scheduler (so no splitting
2581 occurred before the final instruction emission).
2582
2583 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2584 */
2585 if (!after_arm_reorg
2586 && !cond
2587 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2588 1, 0)
2589 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2590 + (code != SET))))
2591 {
2592 if (code == SET)
2593 {
2594 /* Currently SET is the only monadic value for CODE, all
2595 the rest are diadic. */
2596 if (TARGET_USE_MOVT)
2597 arm_emit_movpair (target, GEN_INT (val));
2598 else
2599 emit_set_insn (target, GEN_INT (val));
2600
2601 return 1;
2602 }
2603 else
2604 {
2605 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2606
2607 if (TARGET_USE_MOVT)
2608 arm_emit_movpair (temp, GEN_INT (val));
2609 else
2610 emit_set_insn (temp, GEN_INT (val));
2611
2612 /* For MINUS, the value is subtracted from, since we never
2613 have subtraction of a constant. */
2614 if (code == MINUS)
2615 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2616 else
2617 emit_set_insn (target,
2618 gen_rtx_fmt_ee (code, mode, source, temp));
2619 return 2;
2620 }
2621 }
2622 }
2623
2624 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2625 1);
2626 }
2627
2628 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2629 ARM/THUMB2 immediates, and add up to VAL.
2630 Thr function return value gives the number of insns required. */
2631 static int
2632 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2633 struct four_ints *return_sequence)
2634 {
2635 int best_consecutive_zeros = 0;
2636 int i;
2637 int best_start = 0;
2638 int insns1, insns2;
2639 struct four_ints tmp_sequence;
2640
2641 /* If we aren't targeting ARM, the best place to start is always at
2642 the bottom, otherwise look more closely. */
2643 if (TARGET_ARM)
2644 {
2645 for (i = 0; i < 32; i += 2)
2646 {
2647 int consecutive_zeros = 0;
2648
2649 if (!(val & (3 << i)))
2650 {
2651 while ((i < 32) && !(val & (3 << i)))
2652 {
2653 consecutive_zeros += 2;
2654 i += 2;
2655 }
2656 if (consecutive_zeros > best_consecutive_zeros)
2657 {
2658 best_consecutive_zeros = consecutive_zeros;
2659 best_start = i - consecutive_zeros;
2660 }
2661 i -= 2;
2662 }
2663 }
2664 }
2665
2666 /* So long as it won't require any more insns to do so, it's
2667 desirable to emit a small constant (in bits 0...9) in the last
2668 insn. This way there is more chance that it can be combined with
2669 a later addressing insn to form a pre-indexed load or store
2670 operation. Consider:
2671
2672 *((volatile int *)0xe0000100) = 1;
2673 *((volatile int *)0xe0000110) = 2;
2674
2675 We want this to wind up as:
2676
2677 mov rA, #0xe0000000
2678 mov rB, #1
2679 str rB, [rA, #0x100]
2680 mov rB, #2
2681 str rB, [rA, #0x110]
2682
2683 rather than having to synthesize both large constants from scratch.
2684
2685 Therefore, we calculate how many insns would be required to emit
2686 the constant starting from `best_start', and also starting from
2687 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2688 yield a shorter sequence, we may as well use zero. */
2689 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2690 if (best_start != 0
2691 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2692 {
2693 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2694 if (insns2 <= insns1)
2695 {
2696 *return_sequence = tmp_sequence;
2697 insns1 = insns2;
2698 }
2699 }
2700
2701 return insns1;
2702 }
2703
2704 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2705 static int
2706 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2707 struct four_ints *return_sequence, int i)
2708 {
2709 int remainder = val & 0xffffffff;
2710 int insns = 0;
2711
2712 /* Try and find a way of doing the job in either two or three
2713 instructions.
2714
2715 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2716 location. We start at position I. This may be the MSB, or
2717 optimial_immediate_sequence may have positioned it at the largest block
2718 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2719 wrapping around to the top of the word when we drop off the bottom.
2720 In the worst case this code should produce no more than four insns.
2721
2722 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2723 constants, shifted to any arbitrary location. We should always start
2724 at the MSB. */
2725 do
2726 {
2727 int end;
2728 unsigned int b1, b2, b3, b4;
2729 unsigned HOST_WIDE_INT result;
2730 int loc;
2731
2732 gcc_assert (insns < 4);
2733
2734 if (i <= 0)
2735 i += 32;
2736
2737 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2738 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2739 {
2740 loc = i;
2741 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2742 /* We can use addw/subw for the last 12 bits. */
2743 result = remainder;
2744 else
2745 {
2746 /* Use an 8-bit shifted/rotated immediate. */
2747 end = i - 8;
2748 if (end < 0)
2749 end += 32;
2750 result = remainder & ((0x0ff << end)
2751 | ((i < end) ? (0xff >> (32 - end))
2752 : 0));
2753 i -= 8;
2754 }
2755 }
2756 else
2757 {
2758 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2759 arbitrary shifts. */
2760 i -= TARGET_ARM ? 2 : 1;
2761 continue;
2762 }
2763
2764 /* Next, see if we can do a better job with a thumb2 replicated
2765 constant.
2766
2767 We do it this way around to catch the cases like 0x01F001E0 where
2768 two 8-bit immediates would work, but a replicated constant would
2769 make it worse.
2770
2771 TODO: 16-bit constants that don't clear all the bits, but still win.
2772 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2773 if (TARGET_THUMB2)
2774 {
2775 b1 = (remainder & 0xff000000) >> 24;
2776 b2 = (remainder & 0x00ff0000) >> 16;
2777 b3 = (remainder & 0x0000ff00) >> 8;
2778 b4 = remainder & 0xff;
2779
2780 if (loc > 24)
2781 {
2782 /* The 8-bit immediate already found clears b1 (and maybe b2),
2783 but must leave b3 and b4 alone. */
2784
2785 /* First try to find a 32-bit replicated constant that clears
2786 almost everything. We can assume that we can't do it in one,
2787 or else we wouldn't be here. */
2788 unsigned int tmp = b1 & b2 & b3 & b4;
2789 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2790 + (tmp << 24);
2791 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2792 + (tmp == b3) + (tmp == b4);
2793 if (tmp
2794 && (matching_bytes >= 3
2795 || (matching_bytes == 2
2796 && const_ok_for_op (remainder & ~tmp2, code))))
2797 {
2798 /* At least 3 of the bytes match, and the fourth has at
2799 least as many bits set, or two of the bytes match
2800 and it will only require one more insn to finish. */
2801 result = tmp2;
2802 i = tmp != b1 ? 32
2803 : tmp != b2 ? 24
2804 : tmp != b3 ? 16
2805 : 8;
2806 }
2807
2808 /* Second, try to find a 16-bit replicated constant that can
2809 leave three of the bytes clear. If b2 or b4 is already
2810 zero, then we can. If the 8-bit from above would not
2811 clear b2 anyway, then we still win. */
2812 else if (b1 == b3 && (!b2 || !b4
2813 || (remainder & 0x00ff0000 & ~result)))
2814 {
2815 result = remainder & 0xff00ff00;
2816 i = 24;
2817 }
2818 }
2819 else if (loc > 16)
2820 {
2821 /* The 8-bit immediate already found clears b2 (and maybe b3)
2822 and we don't get here unless b1 is alredy clear, but it will
2823 leave b4 unchanged. */
2824
2825 /* If we can clear b2 and b4 at once, then we win, since the
2826 8-bits couldn't possibly reach that far. */
2827 if (b2 == b4)
2828 {
2829 result = remainder & 0x00ff00ff;
2830 i = 16;
2831 }
2832 }
2833 }
2834
2835 return_sequence->i[insns++] = result;
2836 remainder &= ~result;
2837
2838 if (code == SET || code == MINUS)
2839 code = PLUS;
2840 }
2841 while (remainder);
2842
2843 return insns;
2844 }
2845
2846 /* Emit an instruction with the indicated PATTERN. If COND is
2847 non-NULL, conditionalize the execution of the instruction on COND
2848 being true. */
2849
2850 static void
2851 emit_constant_insn (rtx cond, rtx pattern)
2852 {
2853 if (cond)
2854 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2855 emit_insn (pattern);
2856 }
2857
2858 /* As above, but extra parameter GENERATE which, if clear, suppresses
2859 RTL generation. */
2860
2861 static int
2862 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2863 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2864 int generate)
2865 {
2866 int can_invert = 0;
2867 int can_negate = 0;
2868 int final_invert = 0;
2869 int i;
2870 int set_sign_bit_copies = 0;
2871 int clear_sign_bit_copies = 0;
2872 int clear_zero_bit_copies = 0;
2873 int set_zero_bit_copies = 0;
2874 int insns = 0, neg_insns, inv_insns;
2875 unsigned HOST_WIDE_INT temp1, temp2;
2876 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2877 struct four_ints *immediates;
2878 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2879
2880 /* Find out which operations are safe for a given CODE. Also do a quick
2881 check for degenerate cases; these can occur when DImode operations
2882 are split. */
2883 switch (code)
2884 {
2885 case SET:
2886 can_invert = 1;
2887 break;
2888
2889 case PLUS:
2890 can_negate = 1;
2891 break;
2892
2893 case IOR:
2894 if (remainder == 0xffffffff)
2895 {
2896 if (generate)
2897 emit_constant_insn (cond,
2898 gen_rtx_SET (VOIDmode, target,
2899 GEN_INT (ARM_SIGN_EXTEND (val))));
2900 return 1;
2901 }
2902
2903 if (remainder == 0)
2904 {
2905 if (reload_completed && rtx_equal_p (target, source))
2906 return 0;
2907
2908 if (generate)
2909 emit_constant_insn (cond,
2910 gen_rtx_SET (VOIDmode, target, source));
2911 return 1;
2912 }
2913 break;
2914
2915 case AND:
2916 if (remainder == 0)
2917 {
2918 if (generate)
2919 emit_constant_insn (cond,
2920 gen_rtx_SET (VOIDmode, target, const0_rtx));
2921 return 1;
2922 }
2923 if (remainder == 0xffffffff)
2924 {
2925 if (reload_completed && rtx_equal_p (target, source))
2926 return 0;
2927 if (generate)
2928 emit_constant_insn (cond,
2929 gen_rtx_SET (VOIDmode, target, source));
2930 return 1;
2931 }
2932 can_invert = 1;
2933 break;
2934
2935 case XOR:
2936 if (remainder == 0)
2937 {
2938 if (reload_completed && rtx_equal_p (target, source))
2939 return 0;
2940 if (generate)
2941 emit_constant_insn (cond,
2942 gen_rtx_SET (VOIDmode, target, source));
2943 return 1;
2944 }
2945
2946 if (remainder == 0xffffffff)
2947 {
2948 if (generate)
2949 emit_constant_insn (cond,
2950 gen_rtx_SET (VOIDmode, target,
2951 gen_rtx_NOT (mode, source)));
2952 return 1;
2953 }
2954 final_invert = 1;
2955 break;
2956
2957 case MINUS:
2958 /* We treat MINUS as (val - source), since (source - val) is always
2959 passed as (source + (-val)). */
2960 if (remainder == 0)
2961 {
2962 if (generate)
2963 emit_constant_insn (cond,
2964 gen_rtx_SET (VOIDmode, target,
2965 gen_rtx_NEG (mode, source)));
2966 return 1;
2967 }
2968 if (const_ok_for_arm (val))
2969 {
2970 if (generate)
2971 emit_constant_insn (cond,
2972 gen_rtx_SET (VOIDmode, target,
2973 gen_rtx_MINUS (mode, GEN_INT (val),
2974 source)));
2975 return 1;
2976 }
2977
2978 break;
2979
2980 default:
2981 gcc_unreachable ();
2982 }
2983
2984 /* If we can do it in one insn get out quickly. */
2985 if (const_ok_for_op (val, code))
2986 {
2987 if (generate)
2988 emit_constant_insn (cond,
2989 gen_rtx_SET (VOIDmode, target,
2990 (source
2991 ? gen_rtx_fmt_ee (code, mode, source,
2992 GEN_INT (val))
2993 : GEN_INT (val))));
2994 return 1;
2995 }
2996
2997 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2998 insn. */
2999 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3000 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3001 {
3002 if (generate)
3003 {
3004 if (mode == SImode && i == 16)
3005 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3006 smaller insn. */
3007 emit_constant_insn (cond,
3008 gen_zero_extendhisi2
3009 (target, gen_lowpart (HImode, source)));
3010 else
3011 /* Extz only supports SImode, but we can coerce the operands
3012 into that mode. */
3013 emit_constant_insn (cond,
3014 gen_extzv_t2 (gen_lowpart (SImode, target),
3015 gen_lowpart (SImode, source),
3016 GEN_INT (i), const0_rtx));
3017 }
3018
3019 return 1;
3020 }
3021
3022 /* Calculate a few attributes that may be useful for specific
3023 optimizations. */
3024 /* Count number of leading zeros. */
3025 for (i = 31; i >= 0; i--)
3026 {
3027 if ((remainder & (1 << i)) == 0)
3028 clear_sign_bit_copies++;
3029 else
3030 break;
3031 }
3032
3033 /* Count number of leading 1's. */
3034 for (i = 31; i >= 0; i--)
3035 {
3036 if ((remainder & (1 << i)) != 0)
3037 set_sign_bit_copies++;
3038 else
3039 break;
3040 }
3041
3042 /* Count number of trailing zero's. */
3043 for (i = 0; i <= 31; i++)
3044 {
3045 if ((remainder & (1 << i)) == 0)
3046 clear_zero_bit_copies++;
3047 else
3048 break;
3049 }
3050
3051 /* Count number of trailing 1's. */
3052 for (i = 0; i <= 31; i++)
3053 {
3054 if ((remainder & (1 << i)) != 0)
3055 set_zero_bit_copies++;
3056 else
3057 break;
3058 }
3059
3060 switch (code)
3061 {
3062 case SET:
3063 /* See if we can do this by sign_extending a constant that is known
3064 to be negative. This is a good, way of doing it, since the shift
3065 may well merge into a subsequent insn. */
3066 if (set_sign_bit_copies > 1)
3067 {
3068 if (const_ok_for_arm
3069 (temp1 = ARM_SIGN_EXTEND (remainder
3070 << (set_sign_bit_copies - 1))))
3071 {
3072 if (generate)
3073 {
3074 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3075 emit_constant_insn (cond,
3076 gen_rtx_SET (VOIDmode, new_src,
3077 GEN_INT (temp1)));
3078 emit_constant_insn (cond,
3079 gen_ashrsi3 (target, new_src,
3080 GEN_INT (set_sign_bit_copies - 1)));
3081 }
3082 return 2;
3083 }
3084 /* For an inverted constant, we will need to set the low bits,
3085 these will be shifted out of harm's way. */
3086 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3087 if (const_ok_for_arm (~temp1))
3088 {
3089 if (generate)
3090 {
3091 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3092 emit_constant_insn (cond,
3093 gen_rtx_SET (VOIDmode, new_src,
3094 GEN_INT (temp1)));
3095 emit_constant_insn (cond,
3096 gen_ashrsi3 (target, new_src,
3097 GEN_INT (set_sign_bit_copies - 1)));
3098 }
3099 return 2;
3100 }
3101 }
3102
3103 /* See if we can calculate the value as the difference between two
3104 valid immediates. */
3105 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3106 {
3107 int topshift = clear_sign_bit_copies & ~1;
3108
3109 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3110 & (0xff000000 >> topshift));
3111
3112 /* If temp1 is zero, then that means the 9 most significant
3113 bits of remainder were 1 and we've caused it to overflow.
3114 When topshift is 0 we don't need to do anything since we
3115 can borrow from 'bit 32'. */
3116 if (temp1 == 0 && topshift != 0)
3117 temp1 = 0x80000000 >> (topshift - 1);
3118
3119 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3120
3121 if (const_ok_for_arm (temp2))
3122 {
3123 if (generate)
3124 {
3125 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3126 emit_constant_insn (cond,
3127 gen_rtx_SET (VOIDmode, new_src,
3128 GEN_INT (temp1)));
3129 emit_constant_insn (cond,
3130 gen_addsi3 (target, new_src,
3131 GEN_INT (-temp2)));
3132 }
3133
3134 return 2;
3135 }
3136 }
3137
3138 /* See if we can generate this by setting the bottom (or the top)
3139 16 bits, and then shifting these into the other half of the
3140 word. We only look for the simplest cases, to do more would cost
3141 too much. Be careful, however, not to generate this when the
3142 alternative would take fewer insns. */
3143 if (val & 0xffff0000)
3144 {
3145 temp1 = remainder & 0xffff0000;
3146 temp2 = remainder & 0x0000ffff;
3147
3148 /* Overlaps outside this range are best done using other methods. */
3149 for (i = 9; i < 24; i++)
3150 {
3151 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3152 && !const_ok_for_arm (temp2))
3153 {
3154 rtx new_src = (subtargets
3155 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3156 : target);
3157 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3158 source, subtargets, generate);
3159 source = new_src;
3160 if (generate)
3161 emit_constant_insn
3162 (cond,
3163 gen_rtx_SET
3164 (VOIDmode, target,
3165 gen_rtx_IOR (mode,
3166 gen_rtx_ASHIFT (mode, source,
3167 GEN_INT (i)),
3168 source)));
3169 return insns + 1;
3170 }
3171 }
3172
3173 /* Don't duplicate cases already considered. */
3174 for (i = 17; i < 24; i++)
3175 {
3176 if (((temp1 | (temp1 >> i)) == remainder)
3177 && !const_ok_for_arm (temp1))
3178 {
3179 rtx new_src = (subtargets
3180 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3181 : target);
3182 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3183 source, subtargets, generate);
3184 source = new_src;
3185 if (generate)
3186 emit_constant_insn
3187 (cond,
3188 gen_rtx_SET (VOIDmode, target,
3189 gen_rtx_IOR
3190 (mode,
3191 gen_rtx_LSHIFTRT (mode, source,
3192 GEN_INT (i)),
3193 source)));
3194 return insns + 1;
3195 }
3196 }
3197 }
3198 break;
3199
3200 case IOR:
3201 case XOR:
3202 /* If we have IOR or XOR, and the constant can be loaded in a
3203 single instruction, and we can find a temporary to put it in,
3204 then this can be done in two instructions instead of 3-4. */
3205 if (subtargets
3206 /* TARGET can't be NULL if SUBTARGETS is 0 */
3207 || (reload_completed && !reg_mentioned_p (target, source)))
3208 {
3209 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3210 {
3211 if (generate)
3212 {
3213 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3214
3215 emit_constant_insn (cond,
3216 gen_rtx_SET (VOIDmode, sub,
3217 GEN_INT (val)));
3218 emit_constant_insn (cond,
3219 gen_rtx_SET (VOIDmode, target,
3220 gen_rtx_fmt_ee (code, mode,
3221 source, sub)));
3222 }
3223 return 2;
3224 }
3225 }
3226
3227 if (code == XOR)
3228 break;
3229
3230 /* Convert.
3231 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3232 and the remainder 0s for e.g. 0xfff00000)
3233 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3234
3235 This can be done in 2 instructions by using shifts with mov or mvn.
3236 e.g. for
3237 x = x | 0xfff00000;
3238 we generate.
3239 mvn r0, r0, asl #12
3240 mvn r0, r0, lsr #12 */
3241 if (set_sign_bit_copies > 8
3242 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3243 {
3244 if (generate)
3245 {
3246 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3247 rtx shift = GEN_INT (set_sign_bit_copies);
3248
3249 emit_constant_insn
3250 (cond,
3251 gen_rtx_SET (VOIDmode, sub,
3252 gen_rtx_NOT (mode,
3253 gen_rtx_ASHIFT (mode,
3254 source,
3255 shift))));
3256 emit_constant_insn
3257 (cond,
3258 gen_rtx_SET (VOIDmode, target,
3259 gen_rtx_NOT (mode,
3260 gen_rtx_LSHIFTRT (mode, sub,
3261 shift))));
3262 }
3263 return 2;
3264 }
3265
3266 /* Convert
3267 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3268 to
3269 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3270
3271 For eg. r0 = r0 | 0xfff
3272 mvn r0, r0, lsr #12
3273 mvn r0, r0, asl #12
3274
3275 */
3276 if (set_zero_bit_copies > 8
3277 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3278 {
3279 if (generate)
3280 {
3281 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3282 rtx shift = GEN_INT (set_zero_bit_copies);
3283
3284 emit_constant_insn
3285 (cond,
3286 gen_rtx_SET (VOIDmode, sub,
3287 gen_rtx_NOT (mode,
3288 gen_rtx_LSHIFTRT (mode,
3289 source,
3290 shift))));
3291 emit_constant_insn
3292 (cond,
3293 gen_rtx_SET (VOIDmode, target,
3294 gen_rtx_NOT (mode,
3295 gen_rtx_ASHIFT (mode, sub,
3296 shift))));
3297 }
3298 return 2;
3299 }
3300
3301 /* This will never be reached for Thumb2 because orn is a valid
3302 instruction. This is for Thumb1 and the ARM 32 bit cases.
3303
3304 x = y | constant (such that ~constant is a valid constant)
3305 Transform this to
3306 x = ~(~y & ~constant).
3307 */
3308 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3309 {
3310 if (generate)
3311 {
3312 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3313 emit_constant_insn (cond,
3314 gen_rtx_SET (VOIDmode, sub,
3315 gen_rtx_NOT (mode, source)));
3316 source = sub;
3317 if (subtargets)
3318 sub = gen_reg_rtx (mode);
3319 emit_constant_insn (cond,
3320 gen_rtx_SET (VOIDmode, sub,
3321 gen_rtx_AND (mode, source,
3322 GEN_INT (temp1))));
3323 emit_constant_insn (cond,
3324 gen_rtx_SET (VOIDmode, target,
3325 gen_rtx_NOT (mode, sub)));
3326 }
3327 return 3;
3328 }
3329 break;
3330
3331 case AND:
3332 /* See if two shifts will do 2 or more insn's worth of work. */
3333 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3334 {
3335 HOST_WIDE_INT shift_mask = ((0xffffffff
3336 << (32 - clear_sign_bit_copies))
3337 & 0xffffffff);
3338
3339 if ((remainder | shift_mask) != 0xffffffff)
3340 {
3341 if (generate)
3342 {
3343 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3344 insns = arm_gen_constant (AND, mode, cond,
3345 remainder | shift_mask,
3346 new_src, source, subtargets, 1);
3347 source = new_src;
3348 }
3349 else
3350 {
3351 rtx targ = subtargets ? NULL_RTX : target;
3352 insns = arm_gen_constant (AND, mode, cond,
3353 remainder | shift_mask,
3354 targ, source, subtargets, 0);
3355 }
3356 }
3357
3358 if (generate)
3359 {
3360 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3361 rtx shift = GEN_INT (clear_sign_bit_copies);
3362
3363 emit_insn (gen_ashlsi3 (new_src, source, shift));
3364 emit_insn (gen_lshrsi3 (target, new_src, shift));
3365 }
3366
3367 return insns + 2;
3368 }
3369
3370 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3371 {
3372 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3373
3374 if ((remainder | shift_mask) != 0xffffffff)
3375 {
3376 if (generate)
3377 {
3378 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3379
3380 insns = arm_gen_constant (AND, mode, cond,
3381 remainder | shift_mask,
3382 new_src, source, subtargets, 1);
3383 source = new_src;
3384 }
3385 else
3386 {
3387 rtx targ = subtargets ? NULL_RTX : target;
3388
3389 insns = arm_gen_constant (AND, mode, cond,
3390 remainder | shift_mask,
3391 targ, source, subtargets, 0);
3392 }
3393 }
3394
3395 if (generate)
3396 {
3397 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3398 rtx shift = GEN_INT (clear_zero_bit_copies);
3399
3400 emit_insn (gen_lshrsi3 (new_src, source, shift));
3401 emit_insn (gen_ashlsi3 (target, new_src, shift));
3402 }
3403
3404 return insns + 2;
3405 }
3406
3407 break;
3408
3409 default:
3410 break;
3411 }
3412
3413 /* Calculate what the instruction sequences would be if we generated it
3414 normally, negated, or inverted. */
3415 if (code == AND)
3416 /* AND cannot be split into multiple insns, so invert and use BIC. */
3417 insns = 99;
3418 else
3419 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3420
3421 if (can_negate)
3422 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3423 &neg_immediates);
3424 else
3425 neg_insns = 99;
3426
3427 if (can_invert || final_invert)
3428 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3429 &inv_immediates);
3430 else
3431 inv_insns = 99;
3432
3433 immediates = &pos_immediates;
3434
3435 /* Is the negated immediate sequence more efficient? */
3436 if (neg_insns < insns && neg_insns <= inv_insns)
3437 {
3438 insns = neg_insns;
3439 immediates = &neg_immediates;
3440 }
3441 else
3442 can_negate = 0;
3443
3444 /* Is the inverted immediate sequence more efficient?
3445 We must allow for an extra NOT instruction for XOR operations, although
3446 there is some chance that the final 'mvn' will get optimized later. */
3447 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3448 {
3449 insns = inv_insns;
3450 immediates = &inv_immediates;
3451 }
3452 else
3453 {
3454 can_invert = 0;
3455 final_invert = 0;
3456 }
3457
3458 /* Now output the chosen sequence as instructions. */
3459 if (generate)
3460 {
3461 for (i = 0; i < insns; i++)
3462 {
3463 rtx new_src, temp1_rtx;
3464
3465 temp1 = immediates->i[i];
3466
3467 if (code == SET || code == MINUS)
3468 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3469 else if ((final_invert || i < (insns - 1)) && subtargets)
3470 new_src = gen_reg_rtx (mode);
3471 else
3472 new_src = target;
3473
3474 if (can_invert)
3475 temp1 = ~temp1;
3476 else if (can_negate)
3477 temp1 = -temp1;
3478
3479 temp1 = trunc_int_for_mode (temp1, mode);
3480 temp1_rtx = GEN_INT (temp1);
3481
3482 if (code == SET)
3483 ;
3484 else if (code == MINUS)
3485 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3486 else
3487 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3488
3489 emit_constant_insn (cond,
3490 gen_rtx_SET (VOIDmode, new_src,
3491 temp1_rtx));
3492 source = new_src;
3493
3494 if (code == SET)
3495 {
3496 can_negate = can_invert;
3497 can_invert = 0;
3498 code = PLUS;
3499 }
3500 else if (code == MINUS)
3501 code = PLUS;
3502 }
3503 }
3504
3505 if (final_invert)
3506 {
3507 if (generate)
3508 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3509 gen_rtx_NOT (mode, source)));
3510 insns++;
3511 }
3512
3513 return insns;
3514 }
3515
3516 /* Canonicalize a comparison so that we are more likely to recognize it.
3517 This can be done for a few constant compares, where we can make the
3518 immediate value easier to load. */
3519
3520 enum rtx_code
3521 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3522 {
3523 enum machine_mode mode;
3524 unsigned HOST_WIDE_INT i, maxval;
3525
3526 mode = GET_MODE (*op0);
3527 if (mode == VOIDmode)
3528 mode = GET_MODE (*op1);
3529
3530 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3531
3532 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3533 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3534 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3535 for GTU/LEU in Thumb mode. */
3536 if (mode == DImode)
3537 {
3538 rtx tem;
3539
3540 if (code == GT || code == LE
3541 || (!TARGET_ARM && (code == GTU || code == LEU)))
3542 {
3543 /* Missing comparison. First try to use an available
3544 comparison. */
3545 if (CONST_INT_P (*op1))
3546 {
3547 i = INTVAL (*op1);
3548 switch (code)
3549 {
3550 case GT:
3551 case LE:
3552 if (i != maxval
3553 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3554 {
3555 *op1 = GEN_INT (i + 1);
3556 return code == GT ? GE : LT;
3557 }
3558 break;
3559 case GTU:
3560 case LEU:
3561 if (i != ~((unsigned HOST_WIDE_INT) 0)
3562 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3563 {
3564 *op1 = GEN_INT (i + 1);
3565 return code == GTU ? GEU : LTU;
3566 }
3567 break;
3568 default:
3569 gcc_unreachable ();
3570 }
3571 }
3572
3573 /* If that did not work, reverse the condition. */
3574 tem = *op0;
3575 *op0 = *op1;
3576 *op1 = tem;
3577 return swap_condition (code);
3578 }
3579
3580 return code;
3581 }
3582
3583 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3584 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3585 to facilitate possible combining with a cmp into 'ands'. */
3586 if (mode == SImode
3587 && GET_CODE (*op0) == ZERO_EXTEND
3588 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3589 && GET_MODE (XEXP (*op0, 0)) == QImode
3590 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3591 && subreg_lowpart_p (XEXP (*op0, 0))
3592 && *op1 == const0_rtx)
3593 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3594 GEN_INT (255));
3595
3596 /* Comparisons smaller than DImode. Only adjust comparisons against
3597 an out-of-range constant. */
3598 if (!CONST_INT_P (*op1)
3599 || const_ok_for_arm (INTVAL (*op1))
3600 || const_ok_for_arm (- INTVAL (*op1)))
3601 return code;
3602
3603 i = INTVAL (*op1);
3604
3605 switch (code)
3606 {
3607 case EQ:
3608 case NE:
3609 return code;
3610
3611 case GT:
3612 case LE:
3613 if (i != maxval
3614 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3615 {
3616 *op1 = GEN_INT (i + 1);
3617 return code == GT ? GE : LT;
3618 }
3619 break;
3620
3621 case GE:
3622 case LT:
3623 if (i != ~maxval
3624 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3625 {
3626 *op1 = GEN_INT (i - 1);
3627 return code == GE ? GT : LE;
3628 }
3629 break;
3630
3631 case GTU:
3632 case LEU:
3633 if (i != ~((unsigned HOST_WIDE_INT) 0)
3634 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3635 {
3636 *op1 = GEN_INT (i + 1);
3637 return code == GTU ? GEU : LTU;
3638 }
3639 break;
3640
3641 case GEU:
3642 case LTU:
3643 if (i != 0
3644 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3645 {
3646 *op1 = GEN_INT (i - 1);
3647 return code == GEU ? GTU : LEU;
3648 }
3649 break;
3650
3651 default:
3652 gcc_unreachable ();
3653 }
3654
3655 return code;
3656 }
3657
3658
3659 /* Define how to find the value returned by a function. */
3660
3661 static rtx
3662 arm_function_value(const_tree type, const_tree func,
3663 bool outgoing ATTRIBUTE_UNUSED)
3664 {
3665 enum machine_mode mode;
3666 int unsignedp ATTRIBUTE_UNUSED;
3667 rtx r ATTRIBUTE_UNUSED;
3668
3669 mode = TYPE_MODE (type);
3670
3671 if (TARGET_AAPCS_BASED)
3672 return aapcs_allocate_return_reg (mode, type, func);
3673
3674 /* Promote integer types. */
3675 if (INTEGRAL_TYPE_P (type))
3676 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3677
3678 /* Promotes small structs returned in a register to full-word size
3679 for big-endian AAPCS. */
3680 if (arm_return_in_msb (type))
3681 {
3682 HOST_WIDE_INT size = int_size_in_bytes (type);
3683 if (size % UNITS_PER_WORD != 0)
3684 {
3685 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3686 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3687 }
3688 }
3689
3690 return arm_libcall_value_1 (mode);
3691 }
3692
3693 static int
3694 libcall_eq (const void *p1, const void *p2)
3695 {
3696 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3697 }
3698
3699 static hashval_t
3700 libcall_hash (const void *p1)
3701 {
3702 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3703 }
3704
3705 static void
3706 add_libcall (htab_t htab, rtx libcall)
3707 {
3708 *htab_find_slot (htab, libcall, INSERT) = libcall;
3709 }
3710
3711 static bool
3712 arm_libcall_uses_aapcs_base (const_rtx libcall)
3713 {
3714 static bool init_done = false;
3715 static htab_t libcall_htab;
3716
3717 if (!init_done)
3718 {
3719 init_done = true;
3720
3721 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3722 NULL);
3723 add_libcall (libcall_htab,
3724 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3725 add_libcall (libcall_htab,
3726 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3727 add_libcall (libcall_htab,
3728 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3729 add_libcall (libcall_htab,
3730 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3731
3732 add_libcall (libcall_htab,
3733 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3734 add_libcall (libcall_htab,
3735 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3736 add_libcall (libcall_htab,
3737 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3738 add_libcall (libcall_htab,
3739 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3740
3741 add_libcall (libcall_htab,
3742 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3743 add_libcall (libcall_htab,
3744 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3745 add_libcall (libcall_htab,
3746 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3747 add_libcall (libcall_htab,
3748 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3749 add_libcall (libcall_htab,
3750 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3751 add_libcall (libcall_htab,
3752 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3753 add_libcall (libcall_htab,
3754 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3755 add_libcall (libcall_htab,
3756 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3757
3758 /* Values from double-precision helper functions are returned in core
3759 registers if the selected core only supports single-precision
3760 arithmetic, even if we are using the hard-float ABI. The same is
3761 true for single-precision helpers, but we will never be using the
3762 hard-float ABI on a CPU which doesn't support single-precision
3763 operations in hardware. */
3764 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3765 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3766 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3767 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3768 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3769 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3770 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3771 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3772 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3773 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3774 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3775 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3776 SFmode));
3777 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3778 DFmode));
3779 }
3780
3781 return libcall && htab_find (libcall_htab, libcall) != NULL;
3782 }
3783
3784 static rtx
3785 arm_libcall_value_1 (enum machine_mode mode)
3786 {
3787 if (TARGET_AAPCS_BASED)
3788 return aapcs_libcall_value (mode);
3789 else if (TARGET_IWMMXT_ABI
3790 && arm_vector_mode_supported_p (mode))
3791 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3792 else
3793 return gen_rtx_REG (mode, ARG_REGISTER (1));
3794 }
3795
3796 /* Define how to find the value returned by a library function
3797 assuming the value has mode MODE. */
3798
3799 static rtx
3800 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3801 {
3802 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3803 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3804 {
3805 /* The following libcalls return their result in integer registers,
3806 even though they return a floating point value. */
3807 if (arm_libcall_uses_aapcs_base (libcall))
3808 return gen_rtx_REG (mode, ARG_REGISTER(1));
3809
3810 }
3811
3812 return arm_libcall_value_1 (mode);
3813 }
3814
3815 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3816
3817 static bool
3818 arm_function_value_regno_p (const unsigned int regno)
3819 {
3820 if (regno == ARG_REGISTER (1)
3821 || (TARGET_32BIT
3822 && TARGET_AAPCS_BASED
3823 && TARGET_VFP
3824 && TARGET_HARD_FLOAT
3825 && regno == FIRST_VFP_REGNUM)
3826 || (TARGET_IWMMXT_ABI
3827 && regno == FIRST_IWMMXT_REGNUM))
3828 return true;
3829
3830 return false;
3831 }
3832
3833 /* Determine the amount of memory needed to store the possible return
3834 registers of an untyped call. */
3835 int
3836 arm_apply_result_size (void)
3837 {
3838 int size = 16;
3839
3840 if (TARGET_32BIT)
3841 {
3842 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3843 size += 32;
3844 if (TARGET_IWMMXT_ABI)
3845 size += 8;
3846 }
3847
3848 return size;
3849 }
3850
3851 /* Decide whether TYPE should be returned in memory (true)
3852 or in a register (false). FNTYPE is the type of the function making
3853 the call. */
3854 static bool
3855 arm_return_in_memory (const_tree type, const_tree fntype)
3856 {
3857 HOST_WIDE_INT size;
3858
3859 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3860
3861 if (TARGET_AAPCS_BASED)
3862 {
3863 /* Simple, non-aggregate types (ie not including vectors and
3864 complex) are always returned in a register (or registers).
3865 We don't care about which register here, so we can short-cut
3866 some of the detail. */
3867 if (!AGGREGATE_TYPE_P (type)
3868 && TREE_CODE (type) != VECTOR_TYPE
3869 && TREE_CODE (type) != COMPLEX_TYPE)
3870 return false;
3871
3872 /* Any return value that is no larger than one word can be
3873 returned in r0. */
3874 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3875 return false;
3876
3877 /* Check any available co-processors to see if they accept the
3878 type as a register candidate (VFP, for example, can return
3879 some aggregates in consecutive registers). These aren't
3880 available if the call is variadic. */
3881 if (aapcs_select_return_coproc (type, fntype) >= 0)
3882 return false;
3883
3884 /* Vector values should be returned using ARM registers, not
3885 memory (unless they're over 16 bytes, which will break since
3886 we only have four call-clobbered registers to play with). */
3887 if (TREE_CODE (type) == VECTOR_TYPE)
3888 return (size < 0 || size > (4 * UNITS_PER_WORD));
3889
3890 /* The rest go in memory. */
3891 return true;
3892 }
3893
3894 if (TREE_CODE (type) == VECTOR_TYPE)
3895 return (size < 0 || size > (4 * UNITS_PER_WORD));
3896
3897 if (!AGGREGATE_TYPE_P (type) &&
3898 (TREE_CODE (type) != VECTOR_TYPE))
3899 /* All simple types are returned in registers. */
3900 return false;
3901
3902 if (arm_abi != ARM_ABI_APCS)
3903 {
3904 /* ATPCS and later return aggregate types in memory only if they are
3905 larger than a word (or are variable size). */
3906 return (size < 0 || size > UNITS_PER_WORD);
3907 }
3908
3909 /* For the arm-wince targets we choose to be compatible with Microsoft's
3910 ARM and Thumb compilers, which always return aggregates in memory. */
3911 #ifndef ARM_WINCE
3912 /* All structures/unions bigger than one word are returned in memory.
3913 Also catch the case where int_size_in_bytes returns -1. In this case
3914 the aggregate is either huge or of variable size, and in either case
3915 we will want to return it via memory and not in a register. */
3916 if (size < 0 || size > UNITS_PER_WORD)
3917 return true;
3918
3919 if (TREE_CODE (type) == RECORD_TYPE)
3920 {
3921 tree field;
3922
3923 /* For a struct the APCS says that we only return in a register
3924 if the type is 'integer like' and every addressable element
3925 has an offset of zero. For practical purposes this means
3926 that the structure can have at most one non bit-field element
3927 and that this element must be the first one in the structure. */
3928
3929 /* Find the first field, ignoring non FIELD_DECL things which will
3930 have been created by C++. */
3931 for (field = TYPE_FIELDS (type);
3932 field && TREE_CODE (field) != FIELD_DECL;
3933 field = DECL_CHAIN (field))
3934 continue;
3935
3936 if (field == NULL)
3937 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3938
3939 /* Check that the first field is valid for returning in a register. */
3940
3941 /* ... Floats are not allowed */
3942 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3943 return true;
3944
3945 /* ... Aggregates that are not themselves valid for returning in
3946 a register are not allowed. */
3947 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3948 return true;
3949
3950 /* Now check the remaining fields, if any. Only bitfields are allowed,
3951 since they are not addressable. */
3952 for (field = DECL_CHAIN (field);
3953 field;
3954 field = DECL_CHAIN (field))
3955 {
3956 if (TREE_CODE (field) != FIELD_DECL)
3957 continue;
3958
3959 if (!DECL_BIT_FIELD_TYPE (field))
3960 return true;
3961 }
3962
3963 return false;
3964 }
3965
3966 if (TREE_CODE (type) == UNION_TYPE)
3967 {
3968 tree field;
3969
3970 /* Unions can be returned in registers if every element is
3971 integral, or can be returned in an integer register. */
3972 for (field = TYPE_FIELDS (type);
3973 field;
3974 field = DECL_CHAIN (field))
3975 {
3976 if (TREE_CODE (field) != FIELD_DECL)
3977 continue;
3978
3979 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3980 return true;
3981
3982 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3983 return true;
3984 }
3985
3986 return false;
3987 }
3988 #endif /* not ARM_WINCE */
3989
3990 /* Return all other types in memory. */
3991 return true;
3992 }
3993
3994 const struct pcs_attribute_arg
3995 {
3996 const char *arg;
3997 enum arm_pcs value;
3998 } pcs_attribute_args[] =
3999 {
4000 {"aapcs", ARM_PCS_AAPCS},
4001 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4002 #if 0
4003 /* We could recognize these, but changes would be needed elsewhere
4004 * to implement them. */
4005 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4006 {"atpcs", ARM_PCS_ATPCS},
4007 {"apcs", ARM_PCS_APCS},
4008 #endif
4009 {NULL, ARM_PCS_UNKNOWN}
4010 };
4011
4012 static enum arm_pcs
4013 arm_pcs_from_attribute (tree attr)
4014 {
4015 const struct pcs_attribute_arg *ptr;
4016 const char *arg;
4017
4018 /* Get the value of the argument. */
4019 if (TREE_VALUE (attr) == NULL_TREE
4020 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4021 return ARM_PCS_UNKNOWN;
4022
4023 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4024
4025 /* Check it against the list of known arguments. */
4026 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4027 if (streq (arg, ptr->arg))
4028 return ptr->value;
4029
4030 /* An unrecognized interrupt type. */
4031 return ARM_PCS_UNKNOWN;
4032 }
4033
4034 /* Get the PCS variant to use for this call. TYPE is the function's type
4035 specification, DECL is the specific declartion. DECL may be null if
4036 the call could be indirect or if this is a library call. */
4037 static enum arm_pcs
4038 arm_get_pcs_model (const_tree type, const_tree decl)
4039 {
4040 bool user_convention = false;
4041 enum arm_pcs user_pcs = arm_pcs_default;
4042 tree attr;
4043
4044 gcc_assert (type);
4045
4046 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4047 if (attr)
4048 {
4049 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4050 user_convention = true;
4051 }
4052
4053 if (TARGET_AAPCS_BASED)
4054 {
4055 /* Detect varargs functions. These always use the base rules
4056 (no argument is ever a candidate for a co-processor
4057 register). */
4058 bool base_rules = stdarg_p (type);
4059
4060 if (user_convention)
4061 {
4062 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4063 sorry ("non-AAPCS derived PCS variant");
4064 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4065 error ("variadic functions must use the base AAPCS variant");
4066 }
4067
4068 if (base_rules)
4069 return ARM_PCS_AAPCS;
4070 else if (user_convention)
4071 return user_pcs;
4072 else if (decl && flag_unit_at_a_time)
4073 {
4074 /* Local functions never leak outside this compilation unit,
4075 so we are free to use whatever conventions are
4076 appropriate. */
4077 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4078 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4079 if (i && i->local)
4080 return ARM_PCS_AAPCS_LOCAL;
4081 }
4082 }
4083 else if (user_convention && user_pcs != arm_pcs_default)
4084 sorry ("PCS variant");
4085
4086 /* For everything else we use the target's default. */
4087 return arm_pcs_default;
4088 }
4089
4090
4091 static void
4092 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4093 const_tree fntype ATTRIBUTE_UNUSED,
4094 rtx libcall ATTRIBUTE_UNUSED,
4095 const_tree fndecl ATTRIBUTE_UNUSED)
4096 {
4097 /* Record the unallocated VFP registers. */
4098 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4099 pcum->aapcs_vfp_reg_alloc = 0;
4100 }
4101
4102 /* Walk down the type tree of TYPE counting consecutive base elements.
4103 If *MODEP is VOIDmode, then set it to the first valid floating point
4104 type. If a non-floating point type is found, or if a floating point
4105 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4106 otherwise return the count in the sub-tree. */
4107 static int
4108 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4109 {
4110 enum machine_mode mode;
4111 HOST_WIDE_INT size;
4112
4113 switch (TREE_CODE (type))
4114 {
4115 case REAL_TYPE:
4116 mode = TYPE_MODE (type);
4117 if (mode != DFmode && mode != SFmode)
4118 return -1;
4119
4120 if (*modep == VOIDmode)
4121 *modep = mode;
4122
4123 if (*modep == mode)
4124 return 1;
4125
4126 break;
4127
4128 case COMPLEX_TYPE:
4129 mode = TYPE_MODE (TREE_TYPE (type));
4130 if (mode != DFmode && mode != SFmode)
4131 return -1;
4132
4133 if (*modep == VOIDmode)
4134 *modep = mode;
4135
4136 if (*modep == mode)
4137 return 2;
4138
4139 break;
4140
4141 case VECTOR_TYPE:
4142 /* Use V2SImode and V4SImode as representatives of all 64-bit
4143 and 128-bit vector types, whether or not those modes are
4144 supported with the present options. */
4145 size = int_size_in_bytes (type);
4146 switch (size)
4147 {
4148 case 8:
4149 mode = V2SImode;
4150 break;
4151 case 16:
4152 mode = V4SImode;
4153 break;
4154 default:
4155 return -1;
4156 }
4157
4158 if (*modep == VOIDmode)
4159 *modep = mode;
4160
4161 /* Vector modes are considered to be opaque: two vectors are
4162 equivalent for the purposes of being homogeneous aggregates
4163 if they are the same size. */
4164 if (*modep == mode)
4165 return 1;
4166
4167 break;
4168
4169 case ARRAY_TYPE:
4170 {
4171 int count;
4172 tree index = TYPE_DOMAIN (type);
4173
4174 /* Can't handle incomplete types. */
4175 if (!COMPLETE_TYPE_P (type))
4176 return -1;
4177
4178 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4179 if (count == -1
4180 || !index
4181 || !TYPE_MAX_VALUE (index)
4182 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4183 || !TYPE_MIN_VALUE (index)
4184 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4185 || count < 0)
4186 return -1;
4187
4188 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4189 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4190
4191 /* There must be no padding. */
4192 if (!host_integerp (TYPE_SIZE (type), 1)
4193 || (tree_low_cst (TYPE_SIZE (type), 1)
4194 != count * GET_MODE_BITSIZE (*modep)))
4195 return -1;
4196
4197 return count;
4198 }
4199
4200 case RECORD_TYPE:
4201 {
4202 int count = 0;
4203 int sub_count;
4204 tree field;
4205
4206 /* Can't handle incomplete types. */
4207 if (!COMPLETE_TYPE_P (type))
4208 return -1;
4209
4210 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4211 {
4212 if (TREE_CODE (field) != FIELD_DECL)
4213 continue;
4214
4215 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4216 if (sub_count < 0)
4217 return -1;
4218 count += sub_count;
4219 }
4220
4221 /* There must be no padding. */
4222 if (!host_integerp (TYPE_SIZE (type), 1)
4223 || (tree_low_cst (TYPE_SIZE (type), 1)
4224 != count * GET_MODE_BITSIZE (*modep)))
4225 return -1;
4226
4227 return count;
4228 }
4229
4230 case UNION_TYPE:
4231 case QUAL_UNION_TYPE:
4232 {
4233 /* These aren't very interesting except in a degenerate case. */
4234 int count = 0;
4235 int sub_count;
4236 tree field;
4237
4238 /* Can't handle incomplete types. */
4239 if (!COMPLETE_TYPE_P (type))
4240 return -1;
4241
4242 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4243 {
4244 if (TREE_CODE (field) != FIELD_DECL)
4245 continue;
4246
4247 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4248 if (sub_count < 0)
4249 return -1;
4250 count = count > sub_count ? count : sub_count;
4251 }
4252
4253 /* There must be no padding. */
4254 if (!host_integerp (TYPE_SIZE (type), 1)
4255 || (tree_low_cst (TYPE_SIZE (type), 1)
4256 != count * GET_MODE_BITSIZE (*modep)))
4257 return -1;
4258
4259 return count;
4260 }
4261
4262 default:
4263 break;
4264 }
4265
4266 return -1;
4267 }
4268
4269 /* Return true if PCS_VARIANT should use VFP registers. */
4270 static bool
4271 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4272 {
4273 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4274 {
4275 static bool seen_thumb1_vfp = false;
4276
4277 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4278 {
4279 sorry ("Thumb-1 hard-float VFP ABI");
4280 /* sorry() is not immediately fatal, so only display this once. */
4281 seen_thumb1_vfp = true;
4282 }
4283
4284 return true;
4285 }
4286
4287 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4288 return false;
4289
4290 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4291 (TARGET_VFP_DOUBLE || !is_double));
4292 }
4293
4294 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4295 suitable for passing or returning in VFP registers for the PCS
4296 variant selected. If it is, then *BASE_MODE is updated to contain
4297 a machine mode describing each element of the argument's type and
4298 *COUNT to hold the number of such elements. */
4299 static bool
4300 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4301 enum machine_mode mode, const_tree type,
4302 enum machine_mode *base_mode, int *count)
4303 {
4304 enum machine_mode new_mode = VOIDmode;
4305
4306 /* If we have the type information, prefer that to working things
4307 out from the mode. */
4308 if (type)
4309 {
4310 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4311
4312 if (ag_count > 0 && ag_count <= 4)
4313 *count = ag_count;
4314 else
4315 return false;
4316 }
4317 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4318 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4319 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4320 {
4321 *count = 1;
4322 new_mode = mode;
4323 }
4324 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4325 {
4326 *count = 2;
4327 new_mode = (mode == DCmode ? DFmode : SFmode);
4328 }
4329 else
4330 return false;
4331
4332
4333 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4334 return false;
4335
4336 *base_mode = new_mode;
4337 return true;
4338 }
4339
4340 static bool
4341 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4342 enum machine_mode mode, const_tree type)
4343 {
4344 int count ATTRIBUTE_UNUSED;
4345 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4346
4347 if (!use_vfp_abi (pcs_variant, false))
4348 return false;
4349 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4350 &ag_mode, &count);
4351 }
4352
4353 static bool
4354 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 const_tree type)
4356 {
4357 if (!use_vfp_abi (pcum->pcs_variant, false))
4358 return false;
4359
4360 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4361 &pcum->aapcs_vfp_rmode,
4362 &pcum->aapcs_vfp_rcount);
4363 }
4364
4365 static bool
4366 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4367 const_tree type ATTRIBUTE_UNUSED)
4368 {
4369 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4370 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4371 int regno;
4372
4373 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4374 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4375 {
4376 pcum->aapcs_vfp_reg_alloc = mask << regno;
4377 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4378 {
4379 int i;
4380 int rcount = pcum->aapcs_vfp_rcount;
4381 int rshift = shift;
4382 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4383 rtx par;
4384 if (!TARGET_NEON)
4385 {
4386 /* Avoid using unsupported vector modes. */
4387 if (rmode == V2SImode)
4388 rmode = DImode;
4389 else if (rmode == V4SImode)
4390 {
4391 rmode = DImode;
4392 rcount *= 2;
4393 rshift /= 2;
4394 }
4395 }
4396 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4397 for (i = 0; i < rcount; i++)
4398 {
4399 rtx tmp = gen_rtx_REG (rmode,
4400 FIRST_VFP_REGNUM + regno + i * rshift);
4401 tmp = gen_rtx_EXPR_LIST
4402 (VOIDmode, tmp,
4403 GEN_INT (i * GET_MODE_SIZE (rmode)));
4404 XVECEXP (par, 0, i) = tmp;
4405 }
4406
4407 pcum->aapcs_reg = par;
4408 }
4409 else
4410 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4411 return true;
4412 }
4413 return false;
4414 }
4415
4416 static rtx
4417 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4418 enum machine_mode mode,
4419 const_tree type ATTRIBUTE_UNUSED)
4420 {
4421 if (!use_vfp_abi (pcs_variant, false))
4422 return NULL;
4423
4424 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4425 {
4426 int count;
4427 enum machine_mode ag_mode;
4428 int i;
4429 rtx par;
4430 int shift;
4431
4432 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4433 &ag_mode, &count);
4434
4435 if (!TARGET_NEON)
4436 {
4437 if (ag_mode == V2SImode)
4438 ag_mode = DImode;
4439 else if (ag_mode == V4SImode)
4440 {
4441 ag_mode = DImode;
4442 count *= 2;
4443 }
4444 }
4445 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4446 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4447 for (i = 0; i < count; i++)
4448 {
4449 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4450 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4451 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4452 XVECEXP (par, 0, i) = tmp;
4453 }
4454
4455 return par;
4456 }
4457
4458 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4459 }
4460
4461 static void
4462 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4463 enum machine_mode mode ATTRIBUTE_UNUSED,
4464 const_tree type ATTRIBUTE_UNUSED)
4465 {
4466 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4467 pcum->aapcs_vfp_reg_alloc = 0;
4468 return;
4469 }
4470
4471 #define AAPCS_CP(X) \
4472 { \
4473 aapcs_ ## X ## _cum_init, \
4474 aapcs_ ## X ## _is_call_candidate, \
4475 aapcs_ ## X ## _allocate, \
4476 aapcs_ ## X ## _is_return_candidate, \
4477 aapcs_ ## X ## _allocate_return_reg, \
4478 aapcs_ ## X ## _advance \
4479 }
4480
4481 /* Table of co-processors that can be used to pass arguments in
4482 registers. Idealy no arugment should be a candidate for more than
4483 one co-processor table entry, but the table is processed in order
4484 and stops after the first match. If that entry then fails to put
4485 the argument into a co-processor register, the argument will go on
4486 the stack. */
4487 static struct
4488 {
4489 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4490 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4491
4492 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4493 BLKmode) is a candidate for this co-processor's registers; this
4494 function should ignore any position-dependent state in
4495 CUMULATIVE_ARGS and only use call-type dependent information. */
4496 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4497
4498 /* Return true if the argument does get a co-processor register; it
4499 should set aapcs_reg to an RTX of the register allocated as is
4500 required for a return from FUNCTION_ARG. */
4501 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4502
4503 /* Return true if a result of mode MODE (or type TYPE if MODE is
4504 BLKmode) is can be returned in this co-processor's registers. */
4505 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4506
4507 /* Allocate and return an RTX element to hold the return type of a
4508 call, this routine must not fail and will only be called if
4509 is_return_candidate returned true with the same parameters. */
4510 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4511
4512 /* Finish processing this argument and prepare to start processing
4513 the next one. */
4514 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4515 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4516 {
4517 AAPCS_CP(vfp)
4518 };
4519
4520 #undef AAPCS_CP
4521
4522 static int
4523 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4524 const_tree type)
4525 {
4526 int i;
4527
4528 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4529 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4530 return i;
4531
4532 return -1;
4533 }
4534
4535 static int
4536 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4537 {
4538 /* We aren't passed a decl, so we can't check that a call is local.
4539 However, it isn't clear that that would be a win anyway, since it
4540 might limit some tail-calling opportunities. */
4541 enum arm_pcs pcs_variant;
4542
4543 if (fntype)
4544 {
4545 const_tree fndecl = NULL_TREE;
4546
4547 if (TREE_CODE (fntype) == FUNCTION_DECL)
4548 {
4549 fndecl = fntype;
4550 fntype = TREE_TYPE (fntype);
4551 }
4552
4553 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4554 }
4555 else
4556 pcs_variant = arm_pcs_default;
4557
4558 if (pcs_variant != ARM_PCS_AAPCS)
4559 {
4560 int i;
4561
4562 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4563 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4564 TYPE_MODE (type),
4565 type))
4566 return i;
4567 }
4568 return -1;
4569 }
4570
4571 static rtx
4572 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4573 const_tree fntype)
4574 {
4575 /* We aren't passed a decl, so we can't check that a call is local.
4576 However, it isn't clear that that would be a win anyway, since it
4577 might limit some tail-calling opportunities. */
4578 enum arm_pcs pcs_variant;
4579 int unsignedp ATTRIBUTE_UNUSED;
4580
4581 if (fntype)
4582 {
4583 const_tree fndecl = NULL_TREE;
4584
4585 if (TREE_CODE (fntype) == FUNCTION_DECL)
4586 {
4587 fndecl = fntype;
4588 fntype = TREE_TYPE (fntype);
4589 }
4590
4591 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4592 }
4593 else
4594 pcs_variant = arm_pcs_default;
4595
4596 /* Promote integer types. */
4597 if (type && INTEGRAL_TYPE_P (type))
4598 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4599
4600 if (pcs_variant != ARM_PCS_AAPCS)
4601 {
4602 int i;
4603
4604 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4605 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4606 type))
4607 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4608 mode, type);
4609 }
4610
4611 /* Promotes small structs returned in a register to full-word size
4612 for big-endian AAPCS. */
4613 if (type && arm_return_in_msb (type))
4614 {
4615 HOST_WIDE_INT size = int_size_in_bytes (type);
4616 if (size % UNITS_PER_WORD != 0)
4617 {
4618 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4619 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4620 }
4621 }
4622
4623 return gen_rtx_REG (mode, R0_REGNUM);
4624 }
4625
4626 static rtx
4627 aapcs_libcall_value (enum machine_mode mode)
4628 {
4629 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4630 && GET_MODE_SIZE (mode) <= 4)
4631 mode = SImode;
4632
4633 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4634 }
4635
4636 /* Lay out a function argument using the AAPCS rules. The rule
4637 numbers referred to here are those in the AAPCS. */
4638 static void
4639 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4640 const_tree type, bool named)
4641 {
4642 int nregs, nregs2;
4643 int ncrn;
4644
4645 /* We only need to do this once per argument. */
4646 if (pcum->aapcs_arg_processed)
4647 return;
4648
4649 pcum->aapcs_arg_processed = true;
4650
4651 /* Special case: if named is false then we are handling an incoming
4652 anonymous argument which is on the stack. */
4653 if (!named)
4654 return;
4655
4656 /* Is this a potential co-processor register candidate? */
4657 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4658 {
4659 int slot = aapcs_select_call_coproc (pcum, mode, type);
4660 pcum->aapcs_cprc_slot = slot;
4661
4662 /* We don't have to apply any of the rules from part B of the
4663 preparation phase, these are handled elsewhere in the
4664 compiler. */
4665
4666 if (slot >= 0)
4667 {
4668 /* A Co-processor register candidate goes either in its own
4669 class of registers or on the stack. */
4670 if (!pcum->aapcs_cprc_failed[slot])
4671 {
4672 /* C1.cp - Try to allocate the argument to co-processor
4673 registers. */
4674 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4675 return;
4676
4677 /* C2.cp - Put the argument on the stack and note that we
4678 can't assign any more candidates in this slot. We also
4679 need to note that we have allocated stack space, so that
4680 we won't later try to split a non-cprc candidate between
4681 core registers and the stack. */
4682 pcum->aapcs_cprc_failed[slot] = true;
4683 pcum->can_split = false;
4684 }
4685
4686 /* We didn't get a register, so this argument goes on the
4687 stack. */
4688 gcc_assert (pcum->can_split == false);
4689 return;
4690 }
4691 }
4692
4693 /* C3 - For double-word aligned arguments, round the NCRN up to the
4694 next even number. */
4695 ncrn = pcum->aapcs_ncrn;
4696 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4697 ncrn++;
4698
4699 nregs = ARM_NUM_REGS2(mode, type);
4700
4701 /* Sigh, this test should really assert that nregs > 0, but a GCC
4702 extension allows empty structs and then gives them empty size; it
4703 then allows such a structure to be passed by value. For some of
4704 the code below we have to pretend that such an argument has
4705 non-zero size so that we 'locate' it correctly either in
4706 registers or on the stack. */
4707 gcc_assert (nregs >= 0);
4708
4709 nregs2 = nregs ? nregs : 1;
4710
4711 /* C4 - Argument fits entirely in core registers. */
4712 if (ncrn + nregs2 <= NUM_ARG_REGS)
4713 {
4714 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4715 pcum->aapcs_next_ncrn = ncrn + nregs;
4716 return;
4717 }
4718
4719 /* C5 - Some core registers left and there are no arguments already
4720 on the stack: split this argument between the remaining core
4721 registers and the stack. */
4722 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4723 {
4724 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4725 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4726 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4727 return;
4728 }
4729
4730 /* C6 - NCRN is set to 4. */
4731 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4732
4733 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4734 return;
4735 }
4736
4737 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4738 for a call to a function whose data type is FNTYPE.
4739 For a library call, FNTYPE is NULL. */
4740 void
4741 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4742 rtx libname,
4743 tree fndecl ATTRIBUTE_UNUSED)
4744 {
4745 /* Long call handling. */
4746 if (fntype)
4747 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4748 else
4749 pcum->pcs_variant = arm_pcs_default;
4750
4751 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4752 {
4753 if (arm_libcall_uses_aapcs_base (libname))
4754 pcum->pcs_variant = ARM_PCS_AAPCS;
4755
4756 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4757 pcum->aapcs_reg = NULL_RTX;
4758 pcum->aapcs_partial = 0;
4759 pcum->aapcs_arg_processed = false;
4760 pcum->aapcs_cprc_slot = -1;
4761 pcum->can_split = true;
4762
4763 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4764 {
4765 int i;
4766
4767 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4768 {
4769 pcum->aapcs_cprc_failed[i] = false;
4770 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4771 }
4772 }
4773 return;
4774 }
4775
4776 /* Legacy ABIs */
4777
4778 /* On the ARM, the offset starts at 0. */
4779 pcum->nregs = 0;
4780 pcum->iwmmxt_nregs = 0;
4781 pcum->can_split = true;
4782
4783 /* Varargs vectors are treated the same as long long.
4784 named_count avoids having to change the way arm handles 'named' */
4785 pcum->named_count = 0;
4786 pcum->nargs = 0;
4787
4788 if (TARGET_REALLY_IWMMXT && fntype)
4789 {
4790 tree fn_arg;
4791
4792 for (fn_arg = TYPE_ARG_TYPES (fntype);
4793 fn_arg;
4794 fn_arg = TREE_CHAIN (fn_arg))
4795 pcum->named_count += 1;
4796
4797 if (! pcum->named_count)
4798 pcum->named_count = INT_MAX;
4799 }
4800 }
4801
4802
4803 /* Return true if mode/type need doubleword alignment. */
4804 static bool
4805 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4806 {
4807 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4808 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4809 }
4810
4811
4812 /* Determine where to put an argument to a function.
4813 Value is zero to push the argument on the stack,
4814 or a hard register in which to store the argument.
4815
4816 MODE is the argument's machine mode.
4817 TYPE is the data type of the argument (as a tree).
4818 This is null for libcalls where that information may
4819 not be available.
4820 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4821 the preceding args and about the function being called.
4822 NAMED is nonzero if this argument is a named parameter
4823 (otherwise it is an extra parameter matching an ellipsis).
4824
4825 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4826 other arguments are passed on the stack. If (NAMED == 0) (which happens
4827 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4828 defined), say it is passed in the stack (function_prologue will
4829 indeed make it pass in the stack if necessary). */
4830
4831 static rtx
4832 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4833 const_tree type, bool named)
4834 {
4835 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4836 int nregs;
4837
4838 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4839 a call insn (op3 of a call_value insn). */
4840 if (mode == VOIDmode)
4841 return const0_rtx;
4842
4843 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4844 {
4845 aapcs_layout_arg (pcum, mode, type, named);
4846 return pcum->aapcs_reg;
4847 }
4848
4849 /* Varargs vectors are treated the same as long long.
4850 named_count avoids having to change the way arm handles 'named' */
4851 if (TARGET_IWMMXT_ABI
4852 && arm_vector_mode_supported_p (mode)
4853 && pcum->named_count > pcum->nargs + 1)
4854 {
4855 if (pcum->iwmmxt_nregs <= 9)
4856 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4857 else
4858 {
4859 pcum->can_split = false;
4860 return NULL_RTX;
4861 }
4862 }
4863
4864 /* Put doubleword aligned quantities in even register pairs. */
4865 if (pcum->nregs & 1
4866 && ARM_DOUBLEWORD_ALIGN
4867 && arm_needs_doubleword_align (mode, type))
4868 pcum->nregs++;
4869
4870 /* Only allow splitting an arg between regs and memory if all preceding
4871 args were allocated to regs. For args passed by reference we only count
4872 the reference pointer. */
4873 if (pcum->can_split)
4874 nregs = 1;
4875 else
4876 nregs = ARM_NUM_REGS2 (mode, type);
4877
4878 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4879 return NULL_RTX;
4880
4881 return gen_rtx_REG (mode, pcum->nregs);
4882 }
4883
4884 static unsigned int
4885 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4886 {
4887 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4888 ? DOUBLEWORD_ALIGNMENT
4889 : PARM_BOUNDARY);
4890 }
4891
4892 static int
4893 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4894 tree type, bool named)
4895 {
4896 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4897 int nregs = pcum->nregs;
4898
4899 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4900 {
4901 aapcs_layout_arg (pcum, mode, type, named);
4902 return pcum->aapcs_partial;
4903 }
4904
4905 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4906 return 0;
4907
4908 if (NUM_ARG_REGS > nregs
4909 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4910 && pcum->can_split)
4911 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4912
4913 return 0;
4914 }
4915
4916 /* Update the data in PCUM to advance over an argument
4917 of mode MODE and data type TYPE.
4918 (TYPE is null for libcalls where that information may not be available.) */
4919
4920 static void
4921 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4922 const_tree type, bool named)
4923 {
4924 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4925
4926 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4927 {
4928 aapcs_layout_arg (pcum, mode, type, named);
4929
4930 if (pcum->aapcs_cprc_slot >= 0)
4931 {
4932 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4933 type);
4934 pcum->aapcs_cprc_slot = -1;
4935 }
4936
4937 /* Generic stuff. */
4938 pcum->aapcs_arg_processed = false;
4939 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4940 pcum->aapcs_reg = NULL_RTX;
4941 pcum->aapcs_partial = 0;
4942 }
4943 else
4944 {
4945 pcum->nargs += 1;
4946 if (arm_vector_mode_supported_p (mode)
4947 && pcum->named_count > pcum->nargs
4948 && TARGET_IWMMXT_ABI)
4949 pcum->iwmmxt_nregs += 1;
4950 else
4951 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4952 }
4953 }
4954
4955 /* Variable sized types are passed by reference. This is a GCC
4956 extension to the ARM ABI. */
4957
4958 static bool
4959 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4960 enum machine_mode mode ATTRIBUTE_UNUSED,
4961 const_tree type, bool named ATTRIBUTE_UNUSED)
4962 {
4963 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4964 }
4965 \f
4966 /* Encode the current state of the #pragma [no_]long_calls. */
4967 typedef enum
4968 {
4969 OFF, /* No #pragma [no_]long_calls is in effect. */
4970 LONG, /* #pragma long_calls is in effect. */
4971 SHORT /* #pragma no_long_calls is in effect. */
4972 } arm_pragma_enum;
4973
4974 static arm_pragma_enum arm_pragma_long_calls = OFF;
4975
4976 void
4977 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4978 {
4979 arm_pragma_long_calls = LONG;
4980 }
4981
4982 void
4983 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4984 {
4985 arm_pragma_long_calls = SHORT;
4986 }
4987
4988 void
4989 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4990 {
4991 arm_pragma_long_calls = OFF;
4992 }
4993 \f
4994 /* Handle an attribute requiring a FUNCTION_DECL;
4995 arguments as in struct attribute_spec.handler. */
4996 static tree
4997 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4998 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4999 {
5000 if (TREE_CODE (*node) != FUNCTION_DECL)
5001 {
5002 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5003 name);
5004 *no_add_attrs = true;
5005 }
5006
5007 return NULL_TREE;
5008 }
5009
5010 /* Handle an "interrupt" or "isr" attribute;
5011 arguments as in struct attribute_spec.handler. */
5012 static tree
5013 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5014 bool *no_add_attrs)
5015 {
5016 if (DECL_P (*node))
5017 {
5018 if (TREE_CODE (*node) != FUNCTION_DECL)
5019 {
5020 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5021 name);
5022 *no_add_attrs = true;
5023 }
5024 /* FIXME: the argument if any is checked for type attributes;
5025 should it be checked for decl ones? */
5026 }
5027 else
5028 {
5029 if (TREE_CODE (*node) == FUNCTION_TYPE
5030 || TREE_CODE (*node) == METHOD_TYPE)
5031 {
5032 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5033 {
5034 warning (OPT_Wattributes, "%qE attribute ignored",
5035 name);
5036 *no_add_attrs = true;
5037 }
5038 }
5039 else if (TREE_CODE (*node) == POINTER_TYPE
5040 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5041 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5042 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5043 {
5044 *node = build_variant_type_copy (*node);
5045 TREE_TYPE (*node) = build_type_attribute_variant
5046 (TREE_TYPE (*node),
5047 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5048 *no_add_attrs = true;
5049 }
5050 else
5051 {
5052 /* Possibly pass this attribute on from the type to a decl. */
5053 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5054 | (int) ATTR_FLAG_FUNCTION_NEXT
5055 | (int) ATTR_FLAG_ARRAY_NEXT))
5056 {
5057 *no_add_attrs = true;
5058 return tree_cons (name, args, NULL_TREE);
5059 }
5060 else
5061 {
5062 warning (OPT_Wattributes, "%qE attribute ignored",
5063 name);
5064 }
5065 }
5066 }
5067
5068 return NULL_TREE;
5069 }
5070
5071 /* Handle a "pcs" attribute; arguments as in struct
5072 attribute_spec.handler. */
5073 static tree
5074 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5075 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5076 {
5077 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5078 {
5079 warning (OPT_Wattributes, "%qE attribute ignored", name);
5080 *no_add_attrs = true;
5081 }
5082 return NULL_TREE;
5083 }
5084
5085 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5086 /* Handle the "notshared" attribute. This attribute is another way of
5087 requesting hidden visibility. ARM's compiler supports
5088 "__declspec(notshared)"; we support the same thing via an
5089 attribute. */
5090
5091 static tree
5092 arm_handle_notshared_attribute (tree *node,
5093 tree name ATTRIBUTE_UNUSED,
5094 tree args ATTRIBUTE_UNUSED,
5095 int flags ATTRIBUTE_UNUSED,
5096 bool *no_add_attrs)
5097 {
5098 tree decl = TYPE_NAME (*node);
5099
5100 if (decl)
5101 {
5102 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5103 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5104 *no_add_attrs = false;
5105 }
5106 return NULL_TREE;
5107 }
5108 #endif
5109
5110 /* Return 0 if the attributes for two types are incompatible, 1 if they
5111 are compatible, and 2 if they are nearly compatible (which causes a
5112 warning to be generated). */
5113 static int
5114 arm_comp_type_attributes (const_tree type1, const_tree type2)
5115 {
5116 int l1, l2, s1, s2;
5117
5118 /* Check for mismatch of non-default calling convention. */
5119 if (TREE_CODE (type1) != FUNCTION_TYPE)
5120 return 1;
5121
5122 /* Check for mismatched call attributes. */
5123 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5124 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5125 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5126 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5127
5128 /* Only bother to check if an attribute is defined. */
5129 if (l1 | l2 | s1 | s2)
5130 {
5131 /* If one type has an attribute, the other must have the same attribute. */
5132 if ((l1 != l2) || (s1 != s2))
5133 return 0;
5134
5135 /* Disallow mixed attributes. */
5136 if ((l1 & s2) || (l2 & s1))
5137 return 0;
5138 }
5139
5140 /* Check for mismatched ISR attribute. */
5141 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5142 if (! l1)
5143 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5144 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5145 if (! l2)
5146 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5147 if (l1 != l2)
5148 return 0;
5149
5150 return 1;
5151 }
5152
5153 /* Assigns default attributes to newly defined type. This is used to
5154 set short_call/long_call attributes for function types of
5155 functions defined inside corresponding #pragma scopes. */
5156 static void
5157 arm_set_default_type_attributes (tree type)
5158 {
5159 /* Add __attribute__ ((long_call)) to all functions, when
5160 inside #pragma long_calls or __attribute__ ((short_call)),
5161 when inside #pragma no_long_calls. */
5162 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5163 {
5164 tree type_attr_list, attr_name;
5165 type_attr_list = TYPE_ATTRIBUTES (type);
5166
5167 if (arm_pragma_long_calls == LONG)
5168 attr_name = get_identifier ("long_call");
5169 else if (arm_pragma_long_calls == SHORT)
5170 attr_name = get_identifier ("short_call");
5171 else
5172 return;
5173
5174 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5175 TYPE_ATTRIBUTES (type) = type_attr_list;
5176 }
5177 }
5178 \f
5179 /* Return true if DECL is known to be linked into section SECTION. */
5180
5181 static bool
5182 arm_function_in_section_p (tree decl, section *section)
5183 {
5184 /* We can only be certain about functions defined in the same
5185 compilation unit. */
5186 if (!TREE_STATIC (decl))
5187 return false;
5188
5189 /* Make sure that SYMBOL always binds to the definition in this
5190 compilation unit. */
5191 if (!targetm.binds_local_p (decl))
5192 return false;
5193
5194 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5195 if (!DECL_SECTION_NAME (decl))
5196 {
5197 /* Make sure that we will not create a unique section for DECL. */
5198 if (flag_function_sections || DECL_ONE_ONLY (decl))
5199 return false;
5200 }
5201
5202 return function_section (decl) == section;
5203 }
5204
5205 /* Return nonzero if a 32-bit "long_call" should be generated for
5206 a call from the current function to DECL. We generate a long_call
5207 if the function:
5208
5209 a. has an __attribute__((long call))
5210 or b. is within the scope of a #pragma long_calls
5211 or c. the -mlong-calls command line switch has been specified
5212
5213 However we do not generate a long call if the function:
5214
5215 d. has an __attribute__ ((short_call))
5216 or e. is inside the scope of a #pragma no_long_calls
5217 or f. is defined in the same section as the current function. */
5218
5219 bool
5220 arm_is_long_call_p (tree decl)
5221 {
5222 tree attrs;
5223
5224 if (!decl)
5225 return TARGET_LONG_CALLS;
5226
5227 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5228 if (lookup_attribute ("short_call", attrs))
5229 return false;
5230
5231 /* For "f", be conservative, and only cater for cases in which the
5232 whole of the current function is placed in the same section. */
5233 if (!flag_reorder_blocks_and_partition
5234 && TREE_CODE (decl) == FUNCTION_DECL
5235 && arm_function_in_section_p (decl, current_function_section ()))
5236 return false;
5237
5238 if (lookup_attribute ("long_call", attrs))
5239 return true;
5240
5241 return TARGET_LONG_CALLS;
5242 }
5243
5244 /* Return nonzero if it is ok to make a tail-call to DECL. */
5245 static bool
5246 arm_function_ok_for_sibcall (tree decl, tree exp)
5247 {
5248 unsigned long func_type;
5249
5250 if (cfun->machine->sibcall_blocked)
5251 return false;
5252
5253 /* Never tailcall something for which we have no decl, or if we
5254 are generating code for Thumb-1. */
5255 if (decl == NULL || TARGET_THUMB1)
5256 return false;
5257
5258 /* The PIC register is live on entry to VxWorks PLT entries, so we
5259 must make the call before restoring the PIC register. */
5260 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5261 return false;
5262
5263 /* Cannot tail-call to long calls, since these are out of range of
5264 a branch instruction. */
5265 if (arm_is_long_call_p (decl))
5266 return false;
5267
5268 /* If we are interworking and the function is not declared static
5269 then we can't tail-call it unless we know that it exists in this
5270 compilation unit (since it might be a Thumb routine). */
5271 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5272 return false;
5273
5274 func_type = arm_current_func_type ();
5275 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5276 if (IS_INTERRUPT (func_type))
5277 return false;
5278
5279 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5280 {
5281 /* Check that the return value locations are the same. For
5282 example that we aren't returning a value from the sibling in
5283 a VFP register but then need to transfer it to a core
5284 register. */
5285 rtx a, b;
5286
5287 a = arm_function_value (TREE_TYPE (exp), decl, false);
5288 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5289 cfun->decl, false);
5290 if (!rtx_equal_p (a, b))
5291 return false;
5292 }
5293
5294 /* Never tailcall if function may be called with a misaligned SP. */
5295 if (IS_STACKALIGN (func_type))
5296 return false;
5297
5298 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5299 references should become a NOP. Don't convert such calls into
5300 sibling calls. */
5301 if (TARGET_AAPCS_BASED
5302 && arm_abi == ARM_ABI_AAPCS
5303 && DECL_WEAK (decl))
5304 return false;
5305
5306 /* Everything else is ok. */
5307 return true;
5308 }
5309
5310 \f
5311 /* Addressing mode support functions. */
5312
5313 /* Return nonzero if X is a legitimate immediate operand when compiling
5314 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5315 int
5316 legitimate_pic_operand_p (rtx x)
5317 {
5318 if (GET_CODE (x) == SYMBOL_REF
5319 || (GET_CODE (x) == CONST
5320 && GET_CODE (XEXP (x, 0)) == PLUS
5321 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5322 return 0;
5323
5324 return 1;
5325 }
5326
5327 /* Record that the current function needs a PIC register. Initialize
5328 cfun->machine->pic_reg if we have not already done so. */
5329
5330 static void
5331 require_pic_register (void)
5332 {
5333 /* A lot of the logic here is made obscure by the fact that this
5334 routine gets called as part of the rtx cost estimation process.
5335 We don't want those calls to affect any assumptions about the real
5336 function; and further, we can't call entry_of_function() until we
5337 start the real expansion process. */
5338 if (!crtl->uses_pic_offset_table)
5339 {
5340 gcc_assert (can_create_pseudo_p ());
5341 if (arm_pic_register != INVALID_REGNUM)
5342 {
5343 if (!cfun->machine->pic_reg)
5344 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5345
5346 /* Play games to avoid marking the function as needing pic
5347 if we are being called as part of the cost-estimation
5348 process. */
5349 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5350 crtl->uses_pic_offset_table = 1;
5351 }
5352 else
5353 {
5354 rtx seq, insn;
5355
5356 if (!cfun->machine->pic_reg)
5357 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5358
5359 /* Play games to avoid marking the function as needing pic
5360 if we are being called as part of the cost-estimation
5361 process. */
5362 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5363 {
5364 crtl->uses_pic_offset_table = 1;
5365 start_sequence ();
5366
5367 arm_load_pic_register (0UL);
5368
5369 seq = get_insns ();
5370 end_sequence ();
5371
5372 for (insn = seq; insn; insn = NEXT_INSN (insn))
5373 if (INSN_P (insn))
5374 INSN_LOCATION (insn) = prologue_location;
5375
5376 /* We can be called during expansion of PHI nodes, where
5377 we can't yet emit instructions directly in the final
5378 insn stream. Queue the insns on the entry edge, they will
5379 be committed after everything else is expanded. */
5380 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5381 }
5382 }
5383 }
5384 }
5385
5386 rtx
5387 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5388 {
5389 if (GET_CODE (orig) == SYMBOL_REF
5390 || GET_CODE (orig) == LABEL_REF)
5391 {
5392 rtx insn;
5393
5394 if (reg == 0)
5395 {
5396 gcc_assert (can_create_pseudo_p ());
5397 reg = gen_reg_rtx (Pmode);
5398 }
5399
5400 /* VxWorks does not impose a fixed gap between segments; the run-time
5401 gap can be different from the object-file gap. We therefore can't
5402 use GOTOFF unless we are absolutely sure that the symbol is in the
5403 same segment as the GOT. Unfortunately, the flexibility of linker
5404 scripts means that we can't be sure of that in general, so assume
5405 that GOTOFF is never valid on VxWorks. */
5406 if ((GET_CODE (orig) == LABEL_REF
5407 || (GET_CODE (orig) == SYMBOL_REF &&
5408 SYMBOL_REF_LOCAL_P (orig)))
5409 && NEED_GOT_RELOC
5410 && !TARGET_VXWORKS_RTP)
5411 insn = arm_pic_static_addr (orig, reg);
5412 else
5413 {
5414 rtx pat;
5415 rtx mem;
5416
5417 /* If this function doesn't have a pic register, create one now. */
5418 require_pic_register ();
5419
5420 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5421
5422 /* Make the MEM as close to a constant as possible. */
5423 mem = SET_SRC (pat);
5424 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5425 MEM_READONLY_P (mem) = 1;
5426 MEM_NOTRAP_P (mem) = 1;
5427
5428 insn = emit_insn (pat);
5429 }
5430
5431 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5432 by loop. */
5433 set_unique_reg_note (insn, REG_EQUAL, orig);
5434
5435 return reg;
5436 }
5437 else if (GET_CODE (orig) == CONST)
5438 {
5439 rtx base, offset;
5440
5441 if (GET_CODE (XEXP (orig, 0)) == PLUS
5442 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5443 return orig;
5444
5445 /* Handle the case where we have: const (UNSPEC_TLS). */
5446 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5447 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5448 return orig;
5449
5450 /* Handle the case where we have:
5451 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5452 CONST_INT. */
5453 if (GET_CODE (XEXP (orig, 0)) == PLUS
5454 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5455 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5456 {
5457 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5458 return orig;
5459 }
5460
5461 if (reg == 0)
5462 {
5463 gcc_assert (can_create_pseudo_p ());
5464 reg = gen_reg_rtx (Pmode);
5465 }
5466
5467 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5468
5469 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5470 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5471 base == reg ? 0 : reg);
5472
5473 if (CONST_INT_P (offset))
5474 {
5475 /* The base register doesn't really matter, we only want to
5476 test the index for the appropriate mode. */
5477 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5478 {
5479 gcc_assert (can_create_pseudo_p ());
5480 offset = force_reg (Pmode, offset);
5481 }
5482
5483 if (CONST_INT_P (offset))
5484 return plus_constant (Pmode, base, INTVAL (offset));
5485 }
5486
5487 if (GET_MODE_SIZE (mode) > 4
5488 && (GET_MODE_CLASS (mode) == MODE_INT
5489 || TARGET_SOFT_FLOAT))
5490 {
5491 emit_insn (gen_addsi3 (reg, base, offset));
5492 return reg;
5493 }
5494
5495 return gen_rtx_PLUS (Pmode, base, offset);
5496 }
5497
5498 return orig;
5499 }
5500
5501
5502 /* Find a spare register to use during the prolog of a function. */
5503
5504 static int
5505 thumb_find_work_register (unsigned long pushed_regs_mask)
5506 {
5507 int reg;
5508
5509 /* Check the argument registers first as these are call-used. The
5510 register allocation order means that sometimes r3 might be used
5511 but earlier argument registers might not, so check them all. */
5512 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5513 if (!df_regs_ever_live_p (reg))
5514 return reg;
5515
5516 /* Before going on to check the call-saved registers we can try a couple
5517 more ways of deducing that r3 is available. The first is when we are
5518 pushing anonymous arguments onto the stack and we have less than 4
5519 registers worth of fixed arguments(*). In this case r3 will be part of
5520 the variable argument list and so we can be sure that it will be
5521 pushed right at the start of the function. Hence it will be available
5522 for the rest of the prologue.
5523 (*): ie crtl->args.pretend_args_size is greater than 0. */
5524 if (cfun->machine->uses_anonymous_args
5525 && crtl->args.pretend_args_size > 0)
5526 return LAST_ARG_REGNUM;
5527
5528 /* The other case is when we have fixed arguments but less than 4 registers
5529 worth. In this case r3 might be used in the body of the function, but
5530 it is not being used to convey an argument into the function. In theory
5531 we could just check crtl->args.size to see how many bytes are
5532 being passed in argument registers, but it seems that it is unreliable.
5533 Sometimes it will have the value 0 when in fact arguments are being
5534 passed. (See testcase execute/20021111-1.c for an example). So we also
5535 check the args_info.nregs field as well. The problem with this field is
5536 that it makes no allowances for arguments that are passed to the
5537 function but which are not used. Hence we could miss an opportunity
5538 when a function has an unused argument in r3. But it is better to be
5539 safe than to be sorry. */
5540 if (! cfun->machine->uses_anonymous_args
5541 && crtl->args.size >= 0
5542 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5543 && crtl->args.info.nregs < 4)
5544 return LAST_ARG_REGNUM;
5545
5546 /* Otherwise look for a call-saved register that is going to be pushed. */
5547 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5548 if (pushed_regs_mask & (1 << reg))
5549 return reg;
5550
5551 if (TARGET_THUMB2)
5552 {
5553 /* Thumb-2 can use high regs. */
5554 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5555 if (pushed_regs_mask & (1 << reg))
5556 return reg;
5557 }
5558 /* Something went wrong - thumb_compute_save_reg_mask()
5559 should have arranged for a suitable register to be pushed. */
5560 gcc_unreachable ();
5561 }
5562
5563 static GTY(()) int pic_labelno;
5564
5565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5566 low register. */
5567
5568 void
5569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5570 {
5571 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5572
5573 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5574 return;
5575
5576 gcc_assert (flag_pic);
5577
5578 pic_reg = cfun->machine->pic_reg;
5579 if (TARGET_VXWORKS_RTP)
5580 {
5581 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5582 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5583 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5584
5585 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5586
5587 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5588 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5589 }
5590 else
5591 {
5592 /* We use an UNSPEC rather than a LABEL_REF because this label
5593 never appears in the code stream. */
5594
5595 labelno = GEN_INT (pic_labelno++);
5596 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5597 l1 = gen_rtx_CONST (VOIDmode, l1);
5598
5599 /* On the ARM the PC register contains 'dot + 8' at the time of the
5600 addition, on the Thumb it is 'dot + 4'. */
5601 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5602 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5603 UNSPEC_GOTSYM_OFF);
5604 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5605
5606 if (TARGET_32BIT)
5607 {
5608 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5609 }
5610 else /* TARGET_THUMB1 */
5611 {
5612 if (arm_pic_register != INVALID_REGNUM
5613 && REGNO (pic_reg) > LAST_LO_REGNUM)
5614 {
5615 /* We will have pushed the pic register, so we should always be
5616 able to find a work register. */
5617 pic_tmp = gen_rtx_REG (SImode,
5618 thumb_find_work_register (saved_regs));
5619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5620 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5621 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5622 }
5623 else
5624 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5625 }
5626 }
5627
5628 /* Need to emit this whether or not we obey regdecls,
5629 since setjmp/longjmp can cause life info to screw up. */
5630 emit_use (pic_reg);
5631 }
5632
5633 /* Generate code to load the address of a static var when flag_pic is set. */
5634 static rtx
5635 arm_pic_static_addr (rtx orig, rtx reg)
5636 {
5637 rtx l1, labelno, offset_rtx, insn;
5638
5639 gcc_assert (flag_pic);
5640
5641 /* We use an UNSPEC rather than a LABEL_REF because this label
5642 never appears in the code stream. */
5643 labelno = GEN_INT (pic_labelno++);
5644 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5645 l1 = gen_rtx_CONST (VOIDmode, l1);
5646
5647 /* On the ARM the PC register contains 'dot + 8' at the time of the
5648 addition, on the Thumb it is 'dot + 4'. */
5649 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5650 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5651 UNSPEC_SYMBOL_OFFSET);
5652 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5653
5654 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5655 return insn;
5656 }
5657
5658 /* Return nonzero if X is valid as an ARM state addressing register. */
5659 static int
5660 arm_address_register_rtx_p (rtx x, int strict_p)
5661 {
5662 int regno;
5663
5664 if (!REG_P (x))
5665 return 0;
5666
5667 regno = REGNO (x);
5668
5669 if (strict_p)
5670 return ARM_REGNO_OK_FOR_BASE_P (regno);
5671
5672 return (regno <= LAST_ARM_REGNUM
5673 || regno >= FIRST_PSEUDO_REGISTER
5674 || regno == FRAME_POINTER_REGNUM
5675 || regno == ARG_POINTER_REGNUM);
5676 }
5677
5678 /* Return TRUE if this rtx is the difference of a symbol and a label,
5679 and will reduce to a PC-relative relocation in the object file.
5680 Expressions like this can be left alone when generating PIC, rather
5681 than forced through the GOT. */
5682 static int
5683 pcrel_constant_p (rtx x)
5684 {
5685 if (GET_CODE (x) == MINUS)
5686 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5687
5688 return FALSE;
5689 }
5690
5691 /* Return true if X will surely end up in an index register after next
5692 splitting pass. */
5693 static bool
5694 will_be_in_index_register (const_rtx x)
5695 {
5696 /* arm.md: calculate_pic_address will split this into a register. */
5697 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5698 }
5699
5700 /* Return nonzero if X is a valid ARM state address operand. */
5701 int
5702 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5703 int strict_p)
5704 {
5705 bool use_ldrd;
5706 enum rtx_code code = GET_CODE (x);
5707
5708 if (arm_address_register_rtx_p (x, strict_p))
5709 return 1;
5710
5711 use_ldrd = (TARGET_LDRD
5712 && (mode == DImode
5713 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5714
5715 if (code == POST_INC || code == PRE_DEC
5716 || ((code == PRE_INC || code == POST_DEC)
5717 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5718 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5719
5720 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5721 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5722 && GET_CODE (XEXP (x, 1)) == PLUS
5723 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5724 {
5725 rtx addend = XEXP (XEXP (x, 1), 1);
5726
5727 /* Don't allow ldrd post increment by register because it's hard
5728 to fixup invalid register choices. */
5729 if (use_ldrd
5730 && GET_CODE (x) == POST_MODIFY
5731 && REG_P (addend))
5732 return 0;
5733
5734 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5735 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5736 }
5737
5738 /* After reload constants split into minipools will have addresses
5739 from a LABEL_REF. */
5740 else if (reload_completed
5741 && (code == LABEL_REF
5742 || (code == CONST
5743 && GET_CODE (XEXP (x, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5745 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5746 return 1;
5747
5748 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5749 return 0;
5750
5751 else if (code == PLUS)
5752 {
5753 rtx xop0 = XEXP (x, 0);
5754 rtx xop1 = XEXP (x, 1);
5755
5756 return ((arm_address_register_rtx_p (xop0, strict_p)
5757 && ((CONST_INT_P (xop1)
5758 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5759 || (!strict_p && will_be_in_index_register (xop1))))
5760 || (arm_address_register_rtx_p (xop1, strict_p)
5761 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5762 }
5763
5764 #if 0
5765 /* Reload currently can't handle MINUS, so disable this for now */
5766 else if (GET_CODE (x) == MINUS)
5767 {
5768 rtx xop0 = XEXP (x, 0);
5769 rtx xop1 = XEXP (x, 1);
5770
5771 return (arm_address_register_rtx_p (xop0, strict_p)
5772 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5773 }
5774 #endif
5775
5776 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5777 && code == SYMBOL_REF
5778 && CONSTANT_POOL_ADDRESS_P (x)
5779 && ! (flag_pic
5780 && symbol_mentioned_p (get_pool_constant (x))
5781 && ! pcrel_constant_p (get_pool_constant (x))))
5782 return 1;
5783
5784 return 0;
5785 }
5786
5787 /* Return nonzero if X is a valid Thumb-2 address operand. */
5788 static int
5789 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5790 {
5791 bool use_ldrd;
5792 enum rtx_code code = GET_CODE (x);
5793
5794 if (arm_address_register_rtx_p (x, strict_p))
5795 return 1;
5796
5797 use_ldrd = (TARGET_LDRD
5798 && (mode == DImode
5799 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5800
5801 if (code == POST_INC || code == PRE_DEC
5802 || ((code == PRE_INC || code == POST_DEC)
5803 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5804 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5805
5806 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5807 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5808 && GET_CODE (XEXP (x, 1)) == PLUS
5809 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5810 {
5811 /* Thumb-2 only has autoincrement by constant. */
5812 rtx addend = XEXP (XEXP (x, 1), 1);
5813 HOST_WIDE_INT offset;
5814
5815 if (!CONST_INT_P (addend))
5816 return 0;
5817
5818 offset = INTVAL(addend);
5819 if (GET_MODE_SIZE (mode) <= 4)
5820 return (offset > -256 && offset < 256);
5821
5822 return (use_ldrd && offset > -1024 && offset < 1024
5823 && (offset & 3) == 0);
5824 }
5825
5826 /* After reload constants split into minipools will have addresses
5827 from a LABEL_REF. */
5828 else if (reload_completed
5829 && (code == LABEL_REF
5830 || (code == CONST
5831 && GET_CODE (XEXP (x, 0)) == PLUS
5832 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5833 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5834 return 1;
5835
5836 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5837 return 0;
5838
5839 else if (code == PLUS)
5840 {
5841 rtx xop0 = XEXP (x, 0);
5842 rtx xop1 = XEXP (x, 1);
5843
5844 return ((arm_address_register_rtx_p (xop0, strict_p)
5845 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5846 || (!strict_p && will_be_in_index_register (xop1))))
5847 || (arm_address_register_rtx_p (xop1, strict_p)
5848 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5849 }
5850
5851 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5852 && code == SYMBOL_REF
5853 && CONSTANT_POOL_ADDRESS_P (x)
5854 && ! (flag_pic
5855 && symbol_mentioned_p (get_pool_constant (x))
5856 && ! pcrel_constant_p (get_pool_constant (x))))
5857 return 1;
5858
5859 return 0;
5860 }
5861
5862 /* Return nonzero if INDEX is valid for an address index operand in
5863 ARM state. */
5864 static int
5865 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5866 int strict_p)
5867 {
5868 HOST_WIDE_INT range;
5869 enum rtx_code code = GET_CODE (index);
5870
5871 /* Standard coprocessor addressing modes. */
5872 if (TARGET_HARD_FLOAT
5873 && TARGET_VFP
5874 && (mode == SFmode || mode == DFmode))
5875 return (code == CONST_INT && INTVAL (index) < 1024
5876 && INTVAL (index) > -1024
5877 && (INTVAL (index) & 3) == 0);
5878
5879 /* For quad modes, we restrict the constant offset to be slightly less
5880 than what the instruction format permits. We do this because for
5881 quad mode moves, we will actually decompose them into two separate
5882 double-mode reads or writes. INDEX must therefore be a valid
5883 (double-mode) offset and so should INDEX+8. */
5884 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5885 return (code == CONST_INT
5886 && INTVAL (index) < 1016
5887 && INTVAL (index) > -1024
5888 && (INTVAL (index) & 3) == 0);
5889
5890 /* We have no such constraint on double mode offsets, so we permit the
5891 full range of the instruction format. */
5892 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5893 return (code == CONST_INT
5894 && INTVAL (index) < 1024
5895 && INTVAL (index) > -1024
5896 && (INTVAL (index) & 3) == 0);
5897
5898 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5899 return (code == CONST_INT
5900 && INTVAL (index) < 1024
5901 && INTVAL (index) > -1024
5902 && (INTVAL (index) & 3) == 0);
5903
5904 if (arm_address_register_rtx_p (index, strict_p)
5905 && (GET_MODE_SIZE (mode) <= 4))
5906 return 1;
5907
5908 if (mode == DImode || mode == DFmode)
5909 {
5910 if (code == CONST_INT)
5911 {
5912 HOST_WIDE_INT val = INTVAL (index);
5913
5914 if (TARGET_LDRD)
5915 return val > -256 && val < 256;
5916 else
5917 return val > -4096 && val < 4092;
5918 }
5919
5920 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5921 }
5922
5923 if (GET_MODE_SIZE (mode) <= 4
5924 && ! (arm_arch4
5925 && (mode == HImode
5926 || mode == HFmode
5927 || (mode == QImode && outer == SIGN_EXTEND))))
5928 {
5929 if (code == MULT)
5930 {
5931 rtx xiop0 = XEXP (index, 0);
5932 rtx xiop1 = XEXP (index, 1);
5933
5934 return ((arm_address_register_rtx_p (xiop0, strict_p)
5935 && power_of_two_operand (xiop1, SImode))
5936 || (arm_address_register_rtx_p (xiop1, strict_p)
5937 && power_of_two_operand (xiop0, SImode)));
5938 }
5939 else if (code == LSHIFTRT || code == ASHIFTRT
5940 || code == ASHIFT || code == ROTATERT)
5941 {
5942 rtx op = XEXP (index, 1);
5943
5944 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5945 && CONST_INT_P (op)
5946 && INTVAL (op) > 0
5947 && INTVAL (op) <= 31);
5948 }
5949 }
5950
5951 /* For ARM v4 we may be doing a sign-extend operation during the
5952 load. */
5953 if (arm_arch4)
5954 {
5955 if (mode == HImode
5956 || mode == HFmode
5957 || (outer == SIGN_EXTEND && mode == QImode))
5958 range = 256;
5959 else
5960 range = 4096;
5961 }
5962 else
5963 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5964
5965 return (code == CONST_INT
5966 && INTVAL (index) < range
5967 && INTVAL (index) > -range);
5968 }
5969
5970 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5971 index operand. i.e. 1, 2, 4 or 8. */
5972 static bool
5973 thumb2_index_mul_operand (rtx op)
5974 {
5975 HOST_WIDE_INT val;
5976
5977 if (!CONST_INT_P (op))
5978 return false;
5979
5980 val = INTVAL(op);
5981 return (val == 1 || val == 2 || val == 4 || val == 8);
5982 }
5983
5984 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5985 static int
5986 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5987 {
5988 enum rtx_code code = GET_CODE (index);
5989
5990 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5991 /* Standard coprocessor addressing modes. */
5992 if (TARGET_HARD_FLOAT
5993 && TARGET_VFP
5994 && (mode == SFmode || mode == DFmode))
5995 return (code == CONST_INT && INTVAL (index) < 1024
5996 /* Thumb-2 allows only > -256 index range for it's core register
5997 load/stores. Since we allow SF/DF in core registers, we have
5998 to use the intersection between -256~4096 (core) and -1024~1024
5999 (coprocessor). */
6000 && INTVAL (index) > -256
6001 && (INTVAL (index) & 3) == 0);
6002
6003 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6004 {
6005 /* For DImode assume values will usually live in core regs
6006 and only allow LDRD addressing modes. */
6007 if (!TARGET_LDRD || mode != DImode)
6008 return (code == CONST_INT
6009 && INTVAL (index) < 1024
6010 && INTVAL (index) > -1024
6011 && (INTVAL (index) & 3) == 0);
6012 }
6013
6014 /* For quad modes, we restrict the constant offset to be slightly less
6015 than what the instruction format permits. We do this because for
6016 quad mode moves, we will actually decompose them into two separate
6017 double-mode reads or writes. INDEX must therefore be a valid
6018 (double-mode) offset and so should INDEX+8. */
6019 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6020 return (code == CONST_INT
6021 && INTVAL (index) < 1016
6022 && INTVAL (index) > -1024
6023 && (INTVAL (index) & 3) == 0);
6024
6025 /* We have no such constraint on double mode offsets, so we permit the
6026 full range of the instruction format. */
6027 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6028 return (code == CONST_INT
6029 && INTVAL (index) < 1024
6030 && INTVAL (index) > -1024
6031 && (INTVAL (index) & 3) == 0);
6032
6033 if (arm_address_register_rtx_p (index, strict_p)
6034 && (GET_MODE_SIZE (mode) <= 4))
6035 return 1;
6036
6037 if (mode == DImode || mode == DFmode)
6038 {
6039 if (code == CONST_INT)
6040 {
6041 HOST_WIDE_INT val = INTVAL (index);
6042 /* ??? Can we assume ldrd for thumb2? */
6043 /* Thumb-2 ldrd only has reg+const addressing modes. */
6044 /* ldrd supports offsets of +-1020.
6045 However the ldr fallback does not. */
6046 return val > -256 && val < 256 && (val & 3) == 0;
6047 }
6048 else
6049 return 0;
6050 }
6051
6052 if (code == MULT)
6053 {
6054 rtx xiop0 = XEXP (index, 0);
6055 rtx xiop1 = XEXP (index, 1);
6056
6057 return ((arm_address_register_rtx_p (xiop0, strict_p)
6058 && thumb2_index_mul_operand (xiop1))
6059 || (arm_address_register_rtx_p (xiop1, strict_p)
6060 && thumb2_index_mul_operand (xiop0)));
6061 }
6062 else if (code == ASHIFT)
6063 {
6064 rtx op = XEXP (index, 1);
6065
6066 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6067 && CONST_INT_P (op)
6068 && INTVAL (op) > 0
6069 && INTVAL (op) <= 3);
6070 }
6071
6072 return (code == CONST_INT
6073 && INTVAL (index) < 4096
6074 && INTVAL (index) > -256);
6075 }
6076
6077 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6078 static int
6079 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6080 {
6081 int regno;
6082
6083 if (!REG_P (x))
6084 return 0;
6085
6086 regno = REGNO (x);
6087
6088 if (strict_p)
6089 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6090
6091 return (regno <= LAST_LO_REGNUM
6092 || regno > LAST_VIRTUAL_REGISTER
6093 || regno == FRAME_POINTER_REGNUM
6094 || (GET_MODE_SIZE (mode) >= 4
6095 && (regno == STACK_POINTER_REGNUM
6096 || regno >= FIRST_PSEUDO_REGISTER
6097 || x == hard_frame_pointer_rtx
6098 || x == arg_pointer_rtx)));
6099 }
6100
6101 /* Return nonzero if x is a legitimate index register. This is the case
6102 for any base register that can access a QImode object. */
6103 inline static int
6104 thumb1_index_register_rtx_p (rtx x, int strict_p)
6105 {
6106 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6107 }
6108
6109 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6110
6111 The AP may be eliminated to either the SP or the FP, so we use the
6112 least common denominator, e.g. SImode, and offsets from 0 to 64.
6113
6114 ??? Verify whether the above is the right approach.
6115
6116 ??? Also, the FP may be eliminated to the SP, so perhaps that
6117 needs special handling also.
6118
6119 ??? Look at how the mips16 port solves this problem. It probably uses
6120 better ways to solve some of these problems.
6121
6122 Although it is not incorrect, we don't accept QImode and HImode
6123 addresses based on the frame pointer or arg pointer until the
6124 reload pass starts. This is so that eliminating such addresses
6125 into stack based ones won't produce impossible code. */
6126 int
6127 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6128 {
6129 /* ??? Not clear if this is right. Experiment. */
6130 if (GET_MODE_SIZE (mode) < 4
6131 && !(reload_in_progress || reload_completed)
6132 && (reg_mentioned_p (frame_pointer_rtx, x)
6133 || reg_mentioned_p (arg_pointer_rtx, x)
6134 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6135 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6136 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6137 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6138 return 0;
6139
6140 /* Accept any base register. SP only in SImode or larger. */
6141 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6142 return 1;
6143
6144 /* This is PC relative data before arm_reorg runs. */
6145 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6146 && GET_CODE (x) == SYMBOL_REF
6147 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6148 return 1;
6149
6150 /* This is PC relative data after arm_reorg runs. */
6151 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6152 && reload_completed
6153 && (GET_CODE (x) == LABEL_REF
6154 || (GET_CODE (x) == CONST
6155 && GET_CODE (XEXP (x, 0)) == PLUS
6156 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6157 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6158 return 1;
6159
6160 /* Post-inc indexing only supported for SImode and larger. */
6161 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6162 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6163 return 1;
6164
6165 else if (GET_CODE (x) == PLUS)
6166 {
6167 /* REG+REG address can be any two index registers. */
6168 /* We disallow FRAME+REG addressing since we know that FRAME
6169 will be replaced with STACK, and SP relative addressing only
6170 permits SP+OFFSET. */
6171 if (GET_MODE_SIZE (mode) <= 4
6172 && XEXP (x, 0) != frame_pointer_rtx
6173 && XEXP (x, 1) != frame_pointer_rtx
6174 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6175 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6176 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6177 return 1;
6178
6179 /* REG+const has 5-7 bit offset for non-SP registers. */
6180 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6181 || XEXP (x, 0) == arg_pointer_rtx)
6182 && CONST_INT_P (XEXP (x, 1))
6183 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6184 return 1;
6185
6186 /* REG+const has 10-bit offset for SP, but only SImode and
6187 larger is supported. */
6188 /* ??? Should probably check for DI/DFmode overflow here
6189 just like GO_IF_LEGITIMATE_OFFSET does. */
6190 else if (REG_P (XEXP (x, 0))
6191 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6192 && GET_MODE_SIZE (mode) >= 4
6193 && CONST_INT_P (XEXP (x, 1))
6194 && INTVAL (XEXP (x, 1)) >= 0
6195 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6196 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6197 return 1;
6198
6199 else if (REG_P (XEXP (x, 0))
6200 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6201 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6202 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6203 && REGNO (XEXP (x, 0))
6204 <= LAST_VIRTUAL_POINTER_REGISTER))
6205 && GET_MODE_SIZE (mode) >= 4
6206 && CONST_INT_P (XEXP (x, 1))
6207 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6208 return 1;
6209 }
6210
6211 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6212 && GET_MODE_SIZE (mode) == 4
6213 && GET_CODE (x) == SYMBOL_REF
6214 && CONSTANT_POOL_ADDRESS_P (x)
6215 && ! (flag_pic
6216 && symbol_mentioned_p (get_pool_constant (x))
6217 && ! pcrel_constant_p (get_pool_constant (x))))
6218 return 1;
6219
6220 return 0;
6221 }
6222
6223 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6224 instruction of mode MODE. */
6225 int
6226 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6227 {
6228 switch (GET_MODE_SIZE (mode))
6229 {
6230 case 1:
6231 return val >= 0 && val < 32;
6232
6233 case 2:
6234 return val >= 0 && val < 64 && (val & 1) == 0;
6235
6236 default:
6237 return (val >= 0
6238 && (val + GET_MODE_SIZE (mode)) <= 128
6239 && (val & 3) == 0);
6240 }
6241 }
6242
6243 bool
6244 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6245 {
6246 if (TARGET_ARM)
6247 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6248 else if (TARGET_THUMB2)
6249 return thumb2_legitimate_address_p (mode, x, strict_p);
6250 else /* if (TARGET_THUMB1) */
6251 return thumb1_legitimate_address_p (mode, x, strict_p);
6252 }
6253
6254 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6255
6256 Given an rtx X being reloaded into a reg required to be
6257 in class CLASS, return the class of reg to actually use.
6258 In general this is just CLASS, but for the Thumb core registers and
6259 immediate constants we prefer a LO_REGS class or a subset. */
6260
6261 static reg_class_t
6262 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6263 {
6264 if (TARGET_32BIT)
6265 return rclass;
6266 else
6267 {
6268 if (rclass == GENERAL_REGS
6269 || rclass == HI_REGS
6270 || rclass == NO_REGS
6271 || rclass == STACK_REG)
6272 return LO_REGS;
6273 else
6274 return rclass;
6275 }
6276 }
6277
6278 /* Build the SYMBOL_REF for __tls_get_addr. */
6279
6280 static GTY(()) rtx tls_get_addr_libfunc;
6281
6282 static rtx
6283 get_tls_get_addr (void)
6284 {
6285 if (!tls_get_addr_libfunc)
6286 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6287 return tls_get_addr_libfunc;
6288 }
6289
6290 rtx
6291 arm_load_tp (rtx target)
6292 {
6293 if (!target)
6294 target = gen_reg_rtx (SImode);
6295
6296 if (TARGET_HARD_TP)
6297 {
6298 /* Can return in any reg. */
6299 emit_insn (gen_load_tp_hard (target));
6300 }
6301 else
6302 {
6303 /* Always returned in r0. Immediately copy the result into a pseudo,
6304 otherwise other uses of r0 (e.g. setting up function arguments) may
6305 clobber the value. */
6306
6307 rtx tmp;
6308
6309 emit_insn (gen_load_tp_soft ());
6310
6311 tmp = gen_rtx_REG (SImode, 0);
6312 emit_move_insn (target, tmp);
6313 }
6314 return target;
6315 }
6316
6317 static rtx
6318 load_tls_operand (rtx x, rtx reg)
6319 {
6320 rtx tmp;
6321
6322 if (reg == NULL_RTX)
6323 reg = gen_reg_rtx (SImode);
6324
6325 tmp = gen_rtx_CONST (SImode, x);
6326
6327 emit_move_insn (reg, tmp);
6328
6329 return reg;
6330 }
6331
6332 static rtx
6333 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6334 {
6335 rtx insns, label, labelno, sum;
6336
6337 gcc_assert (reloc != TLS_DESCSEQ);
6338 start_sequence ();
6339
6340 labelno = GEN_INT (pic_labelno++);
6341 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6342 label = gen_rtx_CONST (VOIDmode, label);
6343
6344 sum = gen_rtx_UNSPEC (Pmode,
6345 gen_rtvec (4, x, GEN_INT (reloc), label,
6346 GEN_INT (TARGET_ARM ? 8 : 4)),
6347 UNSPEC_TLS);
6348 reg = load_tls_operand (sum, reg);
6349
6350 if (TARGET_ARM)
6351 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6352 else
6353 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6354
6355 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6356 LCT_PURE, /* LCT_CONST? */
6357 Pmode, 1, reg, Pmode);
6358
6359 insns = get_insns ();
6360 end_sequence ();
6361
6362 return insns;
6363 }
6364
6365 static rtx
6366 arm_tls_descseq_addr (rtx x, rtx reg)
6367 {
6368 rtx labelno = GEN_INT (pic_labelno++);
6369 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6370 rtx sum = gen_rtx_UNSPEC (Pmode,
6371 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6372 gen_rtx_CONST (VOIDmode, label),
6373 GEN_INT (!TARGET_ARM)),
6374 UNSPEC_TLS);
6375 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6376
6377 emit_insn (gen_tlscall (x, labelno));
6378 if (!reg)
6379 reg = gen_reg_rtx (SImode);
6380 else
6381 gcc_assert (REGNO (reg) != 0);
6382
6383 emit_move_insn (reg, reg0);
6384
6385 return reg;
6386 }
6387
6388 rtx
6389 legitimize_tls_address (rtx x, rtx reg)
6390 {
6391 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6392 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6393
6394 switch (model)
6395 {
6396 case TLS_MODEL_GLOBAL_DYNAMIC:
6397 if (TARGET_GNU2_TLS)
6398 {
6399 reg = arm_tls_descseq_addr (x, reg);
6400
6401 tp = arm_load_tp (NULL_RTX);
6402
6403 dest = gen_rtx_PLUS (Pmode, tp, reg);
6404 }
6405 else
6406 {
6407 /* Original scheme */
6408 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6409 dest = gen_reg_rtx (Pmode);
6410 emit_libcall_block (insns, dest, ret, x);
6411 }
6412 return dest;
6413
6414 case TLS_MODEL_LOCAL_DYNAMIC:
6415 if (TARGET_GNU2_TLS)
6416 {
6417 reg = arm_tls_descseq_addr (x, reg);
6418
6419 tp = arm_load_tp (NULL_RTX);
6420
6421 dest = gen_rtx_PLUS (Pmode, tp, reg);
6422 }
6423 else
6424 {
6425 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6426
6427 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6428 share the LDM result with other LD model accesses. */
6429 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6430 UNSPEC_TLS);
6431 dest = gen_reg_rtx (Pmode);
6432 emit_libcall_block (insns, dest, ret, eqv);
6433
6434 /* Load the addend. */
6435 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6436 GEN_INT (TLS_LDO32)),
6437 UNSPEC_TLS);
6438 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6439 dest = gen_rtx_PLUS (Pmode, dest, addend);
6440 }
6441 return dest;
6442
6443 case TLS_MODEL_INITIAL_EXEC:
6444 labelno = GEN_INT (pic_labelno++);
6445 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6446 label = gen_rtx_CONST (VOIDmode, label);
6447 sum = gen_rtx_UNSPEC (Pmode,
6448 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6449 GEN_INT (TARGET_ARM ? 8 : 4)),
6450 UNSPEC_TLS);
6451 reg = load_tls_operand (sum, reg);
6452
6453 if (TARGET_ARM)
6454 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6455 else if (TARGET_THUMB2)
6456 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6457 else
6458 {
6459 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6460 emit_move_insn (reg, gen_const_mem (SImode, reg));
6461 }
6462
6463 tp = arm_load_tp (NULL_RTX);
6464
6465 return gen_rtx_PLUS (Pmode, tp, reg);
6466
6467 case TLS_MODEL_LOCAL_EXEC:
6468 tp = arm_load_tp (NULL_RTX);
6469
6470 reg = gen_rtx_UNSPEC (Pmode,
6471 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6472 UNSPEC_TLS);
6473 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6474
6475 return gen_rtx_PLUS (Pmode, tp, reg);
6476
6477 default:
6478 abort ();
6479 }
6480 }
6481
6482 /* Try machine-dependent ways of modifying an illegitimate address
6483 to be legitimate. If we find one, return the new, valid address. */
6484 rtx
6485 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6486 {
6487 if (!TARGET_ARM)
6488 {
6489 /* TODO: legitimize_address for Thumb2. */
6490 if (TARGET_THUMB2)
6491 return x;
6492 return thumb_legitimize_address (x, orig_x, mode);
6493 }
6494
6495 if (arm_tls_symbol_p (x))
6496 return legitimize_tls_address (x, NULL_RTX);
6497
6498 if (GET_CODE (x) == PLUS)
6499 {
6500 rtx xop0 = XEXP (x, 0);
6501 rtx xop1 = XEXP (x, 1);
6502
6503 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6504 xop0 = force_reg (SImode, xop0);
6505
6506 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6507 xop1 = force_reg (SImode, xop1);
6508
6509 if (ARM_BASE_REGISTER_RTX_P (xop0)
6510 && CONST_INT_P (xop1))
6511 {
6512 HOST_WIDE_INT n, low_n;
6513 rtx base_reg, val;
6514 n = INTVAL (xop1);
6515
6516 /* VFP addressing modes actually allow greater offsets, but for
6517 now we just stick with the lowest common denominator. */
6518 if (mode == DImode
6519 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6520 {
6521 low_n = n & 0x0f;
6522 n &= ~0x0f;
6523 if (low_n > 4)
6524 {
6525 n += 16;
6526 low_n -= 16;
6527 }
6528 }
6529 else
6530 {
6531 low_n = ((mode) == TImode ? 0
6532 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6533 n -= low_n;
6534 }
6535
6536 base_reg = gen_reg_rtx (SImode);
6537 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6538 emit_move_insn (base_reg, val);
6539 x = plus_constant (Pmode, base_reg, low_n);
6540 }
6541 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6542 x = gen_rtx_PLUS (SImode, xop0, xop1);
6543 }
6544
6545 /* XXX We don't allow MINUS any more -- see comment in
6546 arm_legitimate_address_outer_p (). */
6547 else if (GET_CODE (x) == MINUS)
6548 {
6549 rtx xop0 = XEXP (x, 0);
6550 rtx xop1 = XEXP (x, 1);
6551
6552 if (CONSTANT_P (xop0))
6553 xop0 = force_reg (SImode, xop0);
6554
6555 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6556 xop1 = force_reg (SImode, xop1);
6557
6558 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6559 x = gen_rtx_MINUS (SImode, xop0, xop1);
6560 }
6561
6562 /* Make sure to take full advantage of the pre-indexed addressing mode
6563 with absolute addresses which often allows for the base register to
6564 be factorized for multiple adjacent memory references, and it might
6565 even allows for the mini pool to be avoided entirely. */
6566 else if (CONST_INT_P (x) && optimize > 0)
6567 {
6568 unsigned int bits;
6569 HOST_WIDE_INT mask, base, index;
6570 rtx base_reg;
6571
6572 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6573 use a 8-bit index. So let's use a 12-bit index for SImode only and
6574 hope that arm_gen_constant will enable ldrb to use more bits. */
6575 bits = (mode == SImode) ? 12 : 8;
6576 mask = (1 << bits) - 1;
6577 base = INTVAL (x) & ~mask;
6578 index = INTVAL (x) & mask;
6579 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6580 {
6581 /* It'll most probably be more efficient to generate the base
6582 with more bits set and use a negative index instead. */
6583 base |= mask;
6584 index -= mask;
6585 }
6586 base_reg = force_reg (SImode, GEN_INT (base));
6587 x = plus_constant (Pmode, base_reg, index);
6588 }
6589
6590 if (flag_pic)
6591 {
6592 /* We need to find and carefully transform any SYMBOL and LABEL
6593 references; so go back to the original address expression. */
6594 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6595
6596 if (new_x != orig_x)
6597 x = new_x;
6598 }
6599
6600 return x;
6601 }
6602
6603
6604 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6605 to be legitimate. If we find one, return the new, valid address. */
6606 rtx
6607 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6608 {
6609 if (arm_tls_symbol_p (x))
6610 return legitimize_tls_address (x, NULL_RTX);
6611
6612 if (GET_CODE (x) == PLUS
6613 && CONST_INT_P (XEXP (x, 1))
6614 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6615 || INTVAL (XEXP (x, 1)) < 0))
6616 {
6617 rtx xop0 = XEXP (x, 0);
6618 rtx xop1 = XEXP (x, 1);
6619 HOST_WIDE_INT offset = INTVAL (xop1);
6620
6621 /* Try and fold the offset into a biasing of the base register and
6622 then offsetting that. Don't do this when optimizing for space
6623 since it can cause too many CSEs. */
6624 if (optimize_size && offset >= 0
6625 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6626 {
6627 HOST_WIDE_INT delta;
6628
6629 if (offset >= 256)
6630 delta = offset - (256 - GET_MODE_SIZE (mode));
6631 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6632 delta = 31 * GET_MODE_SIZE (mode);
6633 else
6634 delta = offset & (~31 * GET_MODE_SIZE (mode));
6635
6636 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6637 NULL_RTX);
6638 x = plus_constant (Pmode, xop0, delta);
6639 }
6640 else if (offset < 0 && offset > -256)
6641 /* Small negative offsets are best done with a subtract before the
6642 dereference, forcing these into a register normally takes two
6643 instructions. */
6644 x = force_operand (x, NULL_RTX);
6645 else
6646 {
6647 /* For the remaining cases, force the constant into a register. */
6648 xop1 = force_reg (SImode, xop1);
6649 x = gen_rtx_PLUS (SImode, xop0, xop1);
6650 }
6651 }
6652 else if (GET_CODE (x) == PLUS
6653 && s_register_operand (XEXP (x, 1), SImode)
6654 && !s_register_operand (XEXP (x, 0), SImode))
6655 {
6656 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6657
6658 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6659 }
6660
6661 if (flag_pic)
6662 {
6663 /* We need to find and carefully transform any SYMBOL and LABEL
6664 references; so go back to the original address expression. */
6665 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6666
6667 if (new_x != orig_x)
6668 x = new_x;
6669 }
6670
6671 return x;
6672 }
6673
6674 bool
6675 arm_legitimize_reload_address (rtx *p,
6676 enum machine_mode mode,
6677 int opnum, int type,
6678 int ind_levels ATTRIBUTE_UNUSED)
6679 {
6680 /* We must recognize output that we have already generated ourselves. */
6681 if (GET_CODE (*p) == PLUS
6682 && GET_CODE (XEXP (*p, 0)) == PLUS
6683 && REG_P (XEXP (XEXP (*p, 0), 0))
6684 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6685 && CONST_INT_P (XEXP (*p, 1)))
6686 {
6687 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6688 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6689 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6690 return true;
6691 }
6692
6693 if (GET_CODE (*p) == PLUS
6694 && REG_P (XEXP (*p, 0))
6695 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6696 /* If the base register is equivalent to a constant, let the generic
6697 code handle it. Otherwise we will run into problems if a future
6698 reload pass decides to rematerialize the constant. */
6699 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6700 && CONST_INT_P (XEXP (*p, 1)))
6701 {
6702 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6703 HOST_WIDE_INT low, high;
6704
6705 /* Detect coprocessor load/stores. */
6706 bool coproc_p = ((TARGET_HARD_FLOAT
6707 && TARGET_VFP
6708 && (mode == SFmode || mode == DFmode))
6709 || (TARGET_REALLY_IWMMXT
6710 && VALID_IWMMXT_REG_MODE (mode))
6711 || (TARGET_NEON
6712 && (VALID_NEON_DREG_MODE (mode)
6713 || VALID_NEON_QREG_MODE (mode))));
6714
6715 /* For some conditions, bail out when lower two bits are unaligned. */
6716 if ((val & 0x3) != 0
6717 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6718 && (coproc_p
6719 /* For DI, and DF under soft-float: */
6720 || ((mode == DImode || mode == DFmode)
6721 /* Without ldrd, we use stm/ldm, which does not
6722 fair well with unaligned bits. */
6723 && (! TARGET_LDRD
6724 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6725 || TARGET_THUMB2))))
6726 return false;
6727
6728 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6729 of which the (reg+high) gets turned into a reload add insn,
6730 we try to decompose the index into high/low values that can often
6731 also lead to better reload CSE.
6732 For example:
6733 ldr r0, [r2, #4100] // Offset too large
6734 ldr r1, [r2, #4104] // Offset too large
6735
6736 is best reloaded as:
6737 add t1, r2, #4096
6738 ldr r0, [t1, #4]
6739 add t2, r2, #4096
6740 ldr r1, [t2, #8]
6741
6742 which post-reload CSE can simplify in most cases to eliminate the
6743 second add instruction:
6744 add t1, r2, #4096
6745 ldr r0, [t1, #4]
6746 ldr r1, [t1, #8]
6747
6748 The idea here is that we want to split out the bits of the constant
6749 as a mask, rather than as subtracting the maximum offset that the
6750 respective type of load/store used can handle.
6751
6752 When encountering negative offsets, we can still utilize it even if
6753 the overall offset is positive; sometimes this may lead to an immediate
6754 that can be constructed with fewer instructions.
6755 For example:
6756 ldr r0, [r2, #0x3FFFFC]
6757
6758 This is best reloaded as:
6759 add t1, r2, #0x400000
6760 ldr r0, [t1, #-4]
6761
6762 The trick for spotting this for a load insn with N bits of offset
6763 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6764 negative offset that is going to make bit N and all the bits below
6765 it become zero in the remainder part.
6766
6767 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6768 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6769 used in most cases of ARM load/store instructions. */
6770
6771 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6772 (((VAL) & ((1 << (N)) - 1)) \
6773 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6774 : 0)
6775
6776 if (coproc_p)
6777 {
6778 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6779
6780 /* NEON quad-word load/stores are made of two double-word accesses,
6781 so the valid index range is reduced by 8. Treat as 9-bit range if
6782 we go over it. */
6783 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6784 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6785 }
6786 else if (GET_MODE_SIZE (mode) == 8)
6787 {
6788 if (TARGET_LDRD)
6789 low = (TARGET_THUMB2
6790 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6791 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6792 else
6793 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6794 to access doublewords. The supported load/store offsets are
6795 -8, -4, and 4, which we try to produce here. */
6796 low = ((val & 0xf) ^ 0x8) - 0x8;
6797 }
6798 else if (GET_MODE_SIZE (mode) < 8)
6799 {
6800 /* NEON element load/stores do not have an offset. */
6801 if (TARGET_NEON_FP16 && mode == HFmode)
6802 return false;
6803
6804 if (TARGET_THUMB2)
6805 {
6806 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6807 Try the wider 12-bit range first, and re-try if the result
6808 is out of range. */
6809 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6810 if (low < -255)
6811 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6812 }
6813 else
6814 {
6815 if (mode == HImode || mode == HFmode)
6816 {
6817 if (arm_arch4)
6818 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6819 else
6820 {
6821 /* The storehi/movhi_bytes fallbacks can use only
6822 [-4094,+4094] of the full ldrb/strb index range. */
6823 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6824 if (low == 4095 || low == -4095)
6825 return false;
6826 }
6827 }
6828 else
6829 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6830 }
6831 }
6832 else
6833 return false;
6834
6835 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6836 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6837 - (unsigned HOST_WIDE_INT) 0x80000000);
6838 /* Check for overflow or zero */
6839 if (low == 0 || high == 0 || (high + low != val))
6840 return false;
6841
6842 /* Reload the high part into a base reg; leave the low part
6843 in the mem. */
6844 *p = gen_rtx_PLUS (GET_MODE (*p),
6845 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6846 GEN_INT (high)),
6847 GEN_INT (low));
6848 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6849 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6850 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6851 return true;
6852 }
6853
6854 return false;
6855 }
6856
6857 rtx
6858 thumb_legitimize_reload_address (rtx *x_p,
6859 enum machine_mode mode,
6860 int opnum, int type,
6861 int ind_levels ATTRIBUTE_UNUSED)
6862 {
6863 rtx x = *x_p;
6864
6865 if (GET_CODE (x) == PLUS
6866 && GET_MODE_SIZE (mode) < 4
6867 && REG_P (XEXP (x, 0))
6868 && XEXP (x, 0) == stack_pointer_rtx
6869 && CONST_INT_P (XEXP (x, 1))
6870 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6871 {
6872 rtx orig_x = x;
6873
6874 x = copy_rtx (x);
6875 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6876 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6877 return x;
6878 }
6879
6880 /* If both registers are hi-regs, then it's better to reload the
6881 entire expression rather than each register individually. That
6882 only requires one reload register rather than two. */
6883 if (GET_CODE (x) == PLUS
6884 && REG_P (XEXP (x, 0))
6885 && REG_P (XEXP (x, 1))
6886 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6887 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6888 {
6889 rtx orig_x = x;
6890
6891 x = copy_rtx (x);
6892 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6893 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6894 return x;
6895 }
6896
6897 return NULL;
6898 }
6899
6900 /* Test for various thread-local symbols. */
6901
6902 /* Return TRUE if X is a thread-local symbol. */
6903
6904 static bool
6905 arm_tls_symbol_p (rtx x)
6906 {
6907 if (! TARGET_HAVE_TLS)
6908 return false;
6909
6910 if (GET_CODE (x) != SYMBOL_REF)
6911 return false;
6912
6913 return SYMBOL_REF_TLS_MODEL (x) != 0;
6914 }
6915
6916 /* Helper for arm_tls_referenced_p. */
6917
6918 static int
6919 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6920 {
6921 if (GET_CODE (*x) == SYMBOL_REF)
6922 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6923
6924 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6925 TLS offsets, not real symbol references. */
6926 if (GET_CODE (*x) == UNSPEC
6927 && XINT (*x, 1) == UNSPEC_TLS)
6928 return -1;
6929
6930 return 0;
6931 }
6932
6933 /* Return TRUE if X contains any TLS symbol references. */
6934
6935 bool
6936 arm_tls_referenced_p (rtx x)
6937 {
6938 if (! TARGET_HAVE_TLS)
6939 return false;
6940
6941 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6942 }
6943
6944 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6945
6946 On the ARM, allow any integer (invalid ones are removed later by insn
6947 patterns), nice doubles and symbol_refs which refer to the function's
6948 constant pool XXX.
6949
6950 When generating pic allow anything. */
6951
6952 static bool
6953 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6954 {
6955 /* At present, we have no support for Neon structure constants, so forbid
6956 them here. It might be possible to handle simple cases like 0 and -1
6957 in future. */
6958 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6959 return false;
6960
6961 return flag_pic || !label_mentioned_p (x);
6962 }
6963
6964 static bool
6965 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6966 {
6967 return (CONST_INT_P (x)
6968 || CONST_DOUBLE_P (x)
6969 || CONSTANT_ADDRESS_P (x)
6970 || flag_pic);
6971 }
6972
6973 static bool
6974 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6975 {
6976 return (!arm_cannot_force_const_mem (mode, x)
6977 && (TARGET_32BIT
6978 ? arm_legitimate_constant_p_1 (mode, x)
6979 : thumb_legitimate_constant_p (mode, x)));
6980 }
6981
6982 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6983
6984 static bool
6985 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6986 {
6987 rtx base, offset;
6988
6989 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6990 {
6991 split_const (x, &base, &offset);
6992 if (GET_CODE (base) == SYMBOL_REF
6993 && !offset_within_block_p (base, INTVAL (offset)))
6994 return true;
6995 }
6996 return arm_tls_referenced_p (x);
6997 }
6998 \f
6999 #define REG_OR_SUBREG_REG(X) \
7000 (REG_P (X) \
7001 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7002
7003 #define REG_OR_SUBREG_RTX(X) \
7004 (REG_P (X) ? (X) : SUBREG_REG (X))
7005
7006 static inline int
7007 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7008 {
7009 enum machine_mode mode = GET_MODE (x);
7010 int total;
7011
7012 switch (code)
7013 {
7014 case ASHIFT:
7015 case ASHIFTRT:
7016 case LSHIFTRT:
7017 case ROTATERT:
7018 case PLUS:
7019 case MINUS:
7020 case COMPARE:
7021 case NEG:
7022 case NOT:
7023 return COSTS_N_INSNS (1);
7024
7025 case MULT:
7026 if (CONST_INT_P (XEXP (x, 1)))
7027 {
7028 int cycles = 0;
7029 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7030
7031 while (i)
7032 {
7033 i >>= 2;
7034 cycles++;
7035 }
7036 return COSTS_N_INSNS (2) + cycles;
7037 }
7038 return COSTS_N_INSNS (1) + 16;
7039
7040 case SET:
7041 return (COSTS_N_INSNS (1)
7042 + 4 * ((MEM_P (SET_SRC (x)))
7043 + MEM_P (SET_DEST (x))));
7044
7045 case CONST_INT:
7046 if (outer == SET)
7047 {
7048 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7049 return 0;
7050 if (thumb_shiftable_const (INTVAL (x)))
7051 return COSTS_N_INSNS (2);
7052 return COSTS_N_INSNS (3);
7053 }
7054 else if ((outer == PLUS || outer == COMPARE)
7055 && INTVAL (x) < 256 && INTVAL (x) > -256)
7056 return 0;
7057 else if ((outer == IOR || outer == XOR || outer == AND)
7058 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7059 return COSTS_N_INSNS (1);
7060 else if (outer == AND)
7061 {
7062 int i;
7063 /* This duplicates the tests in the andsi3 expander. */
7064 for (i = 9; i <= 31; i++)
7065 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7066 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7067 return COSTS_N_INSNS (2);
7068 }
7069 else if (outer == ASHIFT || outer == ASHIFTRT
7070 || outer == LSHIFTRT)
7071 return 0;
7072 return COSTS_N_INSNS (2);
7073
7074 case CONST:
7075 case CONST_DOUBLE:
7076 case LABEL_REF:
7077 case SYMBOL_REF:
7078 return COSTS_N_INSNS (3);
7079
7080 case UDIV:
7081 case UMOD:
7082 case DIV:
7083 case MOD:
7084 return 100;
7085
7086 case TRUNCATE:
7087 return 99;
7088
7089 case AND:
7090 case XOR:
7091 case IOR:
7092 /* XXX guess. */
7093 return 8;
7094
7095 case MEM:
7096 /* XXX another guess. */
7097 /* Memory costs quite a lot for the first word, but subsequent words
7098 load at the equivalent of a single insn each. */
7099 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7100 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7101 ? 4 : 0));
7102
7103 case IF_THEN_ELSE:
7104 /* XXX a guess. */
7105 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7106 return 14;
7107 return 2;
7108
7109 case SIGN_EXTEND:
7110 case ZERO_EXTEND:
7111 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7112 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7113
7114 if (mode == SImode)
7115 return total;
7116
7117 if (arm_arch6)
7118 return total + COSTS_N_INSNS (1);
7119
7120 /* Assume a two-shift sequence. Increase the cost slightly so
7121 we prefer actual shifts over an extend operation. */
7122 return total + 1 + COSTS_N_INSNS (2);
7123
7124 default:
7125 return 99;
7126 }
7127 }
7128
7129 static inline bool
7130 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7131 {
7132 enum machine_mode mode = GET_MODE (x);
7133 enum rtx_code subcode;
7134 rtx operand;
7135 enum rtx_code code = GET_CODE (x);
7136 *total = 0;
7137
7138 switch (code)
7139 {
7140 case MEM:
7141 /* Memory costs quite a lot for the first word, but subsequent words
7142 load at the equivalent of a single insn each. */
7143 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7144 return true;
7145
7146 case DIV:
7147 case MOD:
7148 case UDIV:
7149 case UMOD:
7150 if (TARGET_HARD_FLOAT && mode == SFmode)
7151 *total = COSTS_N_INSNS (2);
7152 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7153 *total = COSTS_N_INSNS (4);
7154 else
7155 *total = COSTS_N_INSNS (20);
7156 return false;
7157
7158 case ROTATE:
7159 if (REG_P (XEXP (x, 1)))
7160 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7161 else if (!CONST_INT_P (XEXP (x, 1)))
7162 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7163
7164 /* Fall through */
7165 case ROTATERT:
7166 if (mode != SImode)
7167 {
7168 *total += COSTS_N_INSNS (4);
7169 return true;
7170 }
7171
7172 /* Fall through */
7173 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7174 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7175 if (mode == DImode)
7176 {
7177 *total += COSTS_N_INSNS (3);
7178 return true;
7179 }
7180
7181 *total += COSTS_N_INSNS (1);
7182 /* Increase the cost of complex shifts because they aren't any faster,
7183 and reduce dual issue opportunities. */
7184 if (arm_tune_cortex_a9
7185 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7186 ++*total;
7187
7188 return true;
7189
7190 case MINUS:
7191 if (mode == DImode)
7192 {
7193 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7194 if (CONST_INT_P (XEXP (x, 0))
7195 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7196 {
7197 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7198 return true;
7199 }
7200
7201 if (CONST_INT_P (XEXP (x, 1))
7202 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7203 {
7204 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7205 return true;
7206 }
7207
7208 return false;
7209 }
7210
7211 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7212 {
7213 if (TARGET_HARD_FLOAT
7214 && (mode == SFmode
7215 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7216 {
7217 *total = COSTS_N_INSNS (1);
7218 if (CONST_DOUBLE_P (XEXP (x, 0))
7219 && arm_const_double_rtx (XEXP (x, 0)))
7220 {
7221 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7222 return true;
7223 }
7224
7225 if (CONST_DOUBLE_P (XEXP (x, 1))
7226 && arm_const_double_rtx (XEXP (x, 1)))
7227 {
7228 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7229 return true;
7230 }
7231
7232 return false;
7233 }
7234 *total = COSTS_N_INSNS (20);
7235 return false;
7236 }
7237
7238 *total = COSTS_N_INSNS (1);
7239 if (CONST_INT_P (XEXP (x, 0))
7240 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7241 {
7242 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7243 return true;
7244 }
7245
7246 subcode = GET_CODE (XEXP (x, 1));
7247 if (subcode == ASHIFT || subcode == ASHIFTRT
7248 || subcode == LSHIFTRT
7249 || subcode == ROTATE || subcode == ROTATERT)
7250 {
7251 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7252 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7253 return true;
7254 }
7255
7256 /* A shift as a part of RSB costs no more than RSB itself. */
7257 if (GET_CODE (XEXP (x, 0)) == MULT
7258 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7259 {
7260 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7261 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7262 return true;
7263 }
7264
7265 if (subcode == MULT
7266 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7267 {
7268 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7269 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7270 return true;
7271 }
7272
7273 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7274 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7275 {
7276 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7277 if (REG_P (XEXP (XEXP (x, 1), 0))
7278 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7279 *total += COSTS_N_INSNS (1);
7280
7281 return true;
7282 }
7283
7284 /* Fall through */
7285
7286 case PLUS:
7287 if (code == PLUS && arm_arch6 && mode == SImode
7288 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7289 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7290 {
7291 *total = COSTS_N_INSNS (1);
7292 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7293 0, speed);
7294 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7295 return true;
7296 }
7297
7298 /* MLA: All arguments must be registers. We filter out
7299 multiplication by a power of two, so that we fall down into
7300 the code below. */
7301 if (GET_CODE (XEXP (x, 0)) == MULT
7302 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7303 {
7304 /* The cost comes from the cost of the multiply. */
7305 return false;
7306 }
7307
7308 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7309 {
7310 if (TARGET_HARD_FLOAT
7311 && (mode == SFmode
7312 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7313 {
7314 *total = COSTS_N_INSNS (1);
7315 if (CONST_DOUBLE_P (XEXP (x, 1))
7316 && arm_const_double_rtx (XEXP (x, 1)))
7317 {
7318 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7319 return true;
7320 }
7321
7322 return false;
7323 }
7324
7325 *total = COSTS_N_INSNS (20);
7326 return false;
7327 }
7328
7329 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7330 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7331 {
7332 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7333 if (REG_P (XEXP (XEXP (x, 0), 0))
7334 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7335 *total += COSTS_N_INSNS (1);
7336 return true;
7337 }
7338
7339 /* Fall through */
7340
7341 case AND: case XOR: case IOR:
7342
7343 /* Normally the frame registers will be spilt into reg+const during
7344 reload, so it is a bad idea to combine them with other instructions,
7345 since then they might not be moved outside of loops. As a compromise
7346 we allow integration with ops that have a constant as their second
7347 operand. */
7348 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7349 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7350 && !CONST_INT_P (XEXP (x, 1)))
7351 *total = COSTS_N_INSNS (1);
7352
7353 if (mode == DImode)
7354 {
7355 *total += COSTS_N_INSNS (2);
7356 if (CONST_INT_P (XEXP (x, 1))
7357 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7358 {
7359 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7360 return true;
7361 }
7362
7363 return false;
7364 }
7365
7366 *total += COSTS_N_INSNS (1);
7367 if (CONST_INT_P (XEXP (x, 1))
7368 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7369 {
7370 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7371 return true;
7372 }
7373 subcode = GET_CODE (XEXP (x, 0));
7374 if (subcode == ASHIFT || subcode == ASHIFTRT
7375 || subcode == LSHIFTRT
7376 || subcode == ROTATE || subcode == ROTATERT)
7377 {
7378 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7379 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7380 return true;
7381 }
7382
7383 if (subcode == MULT
7384 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7385 {
7386 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7387 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7388 return true;
7389 }
7390
7391 if (subcode == UMIN || subcode == UMAX
7392 || subcode == SMIN || subcode == SMAX)
7393 {
7394 *total = COSTS_N_INSNS (3);
7395 return true;
7396 }
7397
7398 return false;
7399
7400 case MULT:
7401 /* This should have been handled by the CPU specific routines. */
7402 gcc_unreachable ();
7403
7404 case TRUNCATE:
7405 if (arm_arch3m && mode == SImode
7406 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7408 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7409 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7410 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7411 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7412 {
7413 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7414 return true;
7415 }
7416 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7417 return false;
7418
7419 case NEG:
7420 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7421 {
7422 if (TARGET_HARD_FLOAT
7423 && (mode == SFmode
7424 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7425 {
7426 *total = COSTS_N_INSNS (1);
7427 return false;
7428 }
7429 *total = COSTS_N_INSNS (2);
7430 return false;
7431 }
7432
7433 /* Fall through */
7434 case NOT:
7435 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7436 if (mode == SImode && code == NOT)
7437 {
7438 subcode = GET_CODE (XEXP (x, 0));
7439 if (subcode == ASHIFT || subcode == ASHIFTRT
7440 || subcode == LSHIFTRT
7441 || subcode == ROTATE || subcode == ROTATERT
7442 || (subcode == MULT
7443 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7444 {
7445 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7446 /* Register shifts cost an extra cycle. */
7447 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7448 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7449 subcode, 1, speed);
7450 return true;
7451 }
7452 }
7453
7454 return false;
7455
7456 case IF_THEN_ELSE:
7457 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7458 {
7459 *total = COSTS_N_INSNS (4);
7460 return true;
7461 }
7462
7463 operand = XEXP (x, 0);
7464
7465 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7466 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7467 && REG_P (XEXP (operand, 0))
7468 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7469 *total += COSTS_N_INSNS (1);
7470 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7471 + rtx_cost (XEXP (x, 2), code, 2, speed));
7472 return true;
7473
7474 case NE:
7475 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7476 {
7477 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7478 return true;
7479 }
7480 goto scc_insn;
7481
7482 case GE:
7483 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7484 && mode == SImode && XEXP (x, 1) == const0_rtx)
7485 {
7486 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7487 return true;
7488 }
7489 goto scc_insn;
7490
7491 case LT:
7492 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7493 && mode == SImode && XEXP (x, 1) == const0_rtx)
7494 {
7495 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7496 return true;
7497 }
7498 goto scc_insn;
7499
7500 case EQ:
7501 case GT:
7502 case LE:
7503 case GEU:
7504 case LTU:
7505 case GTU:
7506 case LEU:
7507 case UNORDERED:
7508 case ORDERED:
7509 case UNEQ:
7510 case UNGE:
7511 case UNLT:
7512 case UNGT:
7513 case UNLE:
7514 scc_insn:
7515 /* SCC insns. In the case where the comparison has already been
7516 performed, then they cost 2 instructions. Otherwise they need
7517 an additional comparison before them. */
7518 *total = COSTS_N_INSNS (2);
7519 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7520 {
7521 return true;
7522 }
7523
7524 /* Fall through */
7525 case COMPARE:
7526 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7527 {
7528 *total = 0;
7529 return true;
7530 }
7531
7532 *total += COSTS_N_INSNS (1);
7533 if (CONST_INT_P (XEXP (x, 1))
7534 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7535 {
7536 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7537 return true;
7538 }
7539
7540 subcode = GET_CODE (XEXP (x, 0));
7541 if (subcode == ASHIFT || subcode == ASHIFTRT
7542 || subcode == LSHIFTRT
7543 || subcode == ROTATE || subcode == ROTATERT)
7544 {
7545 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7546 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7547 return true;
7548 }
7549
7550 if (subcode == MULT
7551 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7552 {
7553 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7555 return true;
7556 }
7557
7558 return false;
7559
7560 case UMIN:
7561 case UMAX:
7562 case SMIN:
7563 case SMAX:
7564 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7565 if (!CONST_INT_P (XEXP (x, 1))
7566 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7567 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7568 return true;
7569
7570 case ABS:
7571 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7572 {
7573 if (TARGET_HARD_FLOAT
7574 && (mode == SFmode
7575 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7576 {
7577 *total = COSTS_N_INSNS (1);
7578 return false;
7579 }
7580 *total = COSTS_N_INSNS (20);
7581 return false;
7582 }
7583 *total = COSTS_N_INSNS (1);
7584 if (mode == DImode)
7585 *total += COSTS_N_INSNS (3);
7586 return false;
7587
7588 case SIGN_EXTEND:
7589 case ZERO_EXTEND:
7590 *total = 0;
7591 if (GET_MODE_CLASS (mode) == MODE_INT)
7592 {
7593 rtx op = XEXP (x, 0);
7594 enum machine_mode opmode = GET_MODE (op);
7595
7596 if (mode == DImode)
7597 *total += COSTS_N_INSNS (1);
7598
7599 if (opmode != SImode)
7600 {
7601 if (MEM_P (op))
7602 {
7603 /* If !arm_arch4, we use one of the extendhisi2_mem
7604 or movhi_bytes patterns for HImode. For a QImode
7605 sign extension, we first zero-extend from memory
7606 and then perform a shift sequence. */
7607 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7608 *total += COSTS_N_INSNS (2);
7609 }
7610 else if (arm_arch6)
7611 *total += COSTS_N_INSNS (1);
7612
7613 /* We don't have the necessary insn, so we need to perform some
7614 other operation. */
7615 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7616 /* An and with constant 255. */
7617 *total += COSTS_N_INSNS (1);
7618 else
7619 /* A shift sequence. Increase costs slightly to avoid
7620 combining two shifts into an extend operation. */
7621 *total += COSTS_N_INSNS (2) + 1;
7622 }
7623
7624 return false;
7625 }
7626
7627 switch (GET_MODE (XEXP (x, 0)))
7628 {
7629 case V8QImode:
7630 case V4HImode:
7631 case V2SImode:
7632 case V4QImode:
7633 case V2HImode:
7634 *total = COSTS_N_INSNS (1);
7635 return false;
7636
7637 default:
7638 gcc_unreachable ();
7639 }
7640 gcc_unreachable ();
7641
7642 case ZERO_EXTRACT:
7643 case SIGN_EXTRACT:
7644 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7645 return true;
7646
7647 case CONST_INT:
7648 if (const_ok_for_arm (INTVAL (x))
7649 || const_ok_for_arm (~INTVAL (x)))
7650 *total = COSTS_N_INSNS (1);
7651 else
7652 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7653 INTVAL (x), NULL_RTX,
7654 NULL_RTX, 0, 0));
7655 return true;
7656
7657 case CONST:
7658 case LABEL_REF:
7659 case SYMBOL_REF:
7660 *total = COSTS_N_INSNS (3);
7661 return true;
7662
7663 case HIGH:
7664 *total = COSTS_N_INSNS (1);
7665 return true;
7666
7667 case LO_SUM:
7668 *total = COSTS_N_INSNS (1);
7669 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7670 return true;
7671
7672 case CONST_DOUBLE:
7673 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7674 && (mode == SFmode || !TARGET_VFP_SINGLE))
7675 *total = COSTS_N_INSNS (1);
7676 else
7677 *total = COSTS_N_INSNS (4);
7678 return true;
7679
7680 case SET:
7681 /* The vec_extract patterns accept memory operands that require an
7682 address reload. Account for the cost of that reload to give the
7683 auto-inc-dec pass an incentive to try to replace them. */
7684 if (TARGET_NEON && MEM_P (SET_DEST (x))
7685 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7686 {
7687 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7688 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7689 *total += COSTS_N_INSNS (1);
7690 return true;
7691 }
7692 /* Likewise for the vec_set patterns. */
7693 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7694 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7695 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7696 {
7697 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7698 *total = rtx_cost (mem, code, 0, speed);
7699 if (!neon_vector_mem_operand (mem, 2))
7700 *total += COSTS_N_INSNS (1);
7701 return true;
7702 }
7703 return false;
7704
7705 case UNSPEC:
7706 /* We cost this as high as our memory costs to allow this to
7707 be hoisted from loops. */
7708 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7709 {
7710 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7711 }
7712 return true;
7713
7714 case CONST_VECTOR:
7715 if (TARGET_NEON
7716 && TARGET_HARD_FLOAT
7717 && outer == SET
7718 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7719 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7720 *total = COSTS_N_INSNS (1);
7721 else
7722 *total = COSTS_N_INSNS (4);
7723 return true;
7724
7725 default:
7726 *total = COSTS_N_INSNS (4);
7727 return false;
7728 }
7729 }
7730
7731 /* Estimates the size cost of thumb1 instructions.
7732 For now most of the code is copied from thumb1_rtx_costs. We need more
7733 fine grain tuning when we have more related test cases. */
7734 static inline int
7735 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7736 {
7737 enum machine_mode mode = GET_MODE (x);
7738
7739 switch (code)
7740 {
7741 case ASHIFT:
7742 case ASHIFTRT:
7743 case LSHIFTRT:
7744 case ROTATERT:
7745 case PLUS:
7746 case MINUS:
7747 case COMPARE:
7748 case NEG:
7749 case NOT:
7750 return COSTS_N_INSNS (1);
7751
7752 case MULT:
7753 if (CONST_INT_P (XEXP (x, 1)))
7754 {
7755 /* Thumb1 mul instruction can't operate on const. We must Load it
7756 into a register first. */
7757 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7758 return COSTS_N_INSNS (1) + const_size;
7759 }
7760 return COSTS_N_INSNS (1);
7761
7762 case SET:
7763 return (COSTS_N_INSNS (1)
7764 + 4 * ((MEM_P (SET_SRC (x)))
7765 + MEM_P (SET_DEST (x))));
7766
7767 case CONST_INT:
7768 if (outer == SET)
7769 {
7770 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7771 return COSTS_N_INSNS (1);
7772 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7773 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7774 return COSTS_N_INSNS (2);
7775 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7776 if (thumb_shiftable_const (INTVAL (x)))
7777 return COSTS_N_INSNS (2);
7778 return COSTS_N_INSNS (3);
7779 }
7780 else if ((outer == PLUS || outer == COMPARE)
7781 && INTVAL (x) < 256 && INTVAL (x) > -256)
7782 return 0;
7783 else if ((outer == IOR || outer == XOR || outer == AND)
7784 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7785 return COSTS_N_INSNS (1);
7786 else if (outer == AND)
7787 {
7788 int i;
7789 /* This duplicates the tests in the andsi3 expander. */
7790 for (i = 9; i <= 31; i++)
7791 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7792 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7793 return COSTS_N_INSNS (2);
7794 }
7795 else if (outer == ASHIFT || outer == ASHIFTRT
7796 || outer == LSHIFTRT)
7797 return 0;
7798 return COSTS_N_INSNS (2);
7799
7800 case CONST:
7801 case CONST_DOUBLE:
7802 case LABEL_REF:
7803 case SYMBOL_REF:
7804 return COSTS_N_INSNS (3);
7805
7806 case UDIV:
7807 case UMOD:
7808 case DIV:
7809 case MOD:
7810 return 100;
7811
7812 case TRUNCATE:
7813 return 99;
7814
7815 case AND:
7816 case XOR:
7817 case IOR:
7818 /* XXX guess. */
7819 return 8;
7820
7821 case MEM:
7822 /* XXX another guess. */
7823 /* Memory costs quite a lot for the first word, but subsequent words
7824 load at the equivalent of a single insn each. */
7825 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7826 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7827 ? 4 : 0));
7828
7829 case IF_THEN_ELSE:
7830 /* XXX a guess. */
7831 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7832 return 14;
7833 return 2;
7834
7835 case ZERO_EXTEND:
7836 /* XXX still guessing. */
7837 switch (GET_MODE (XEXP (x, 0)))
7838 {
7839 case QImode:
7840 return (1 + (mode == DImode ? 4 : 0)
7841 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7842
7843 case HImode:
7844 return (4 + (mode == DImode ? 4 : 0)
7845 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7846
7847 case SImode:
7848 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7849
7850 default:
7851 return 99;
7852 }
7853
7854 default:
7855 return 99;
7856 }
7857 }
7858
7859 /* RTX costs when optimizing for size. */
7860 static bool
7861 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7862 int *total)
7863 {
7864 enum machine_mode mode = GET_MODE (x);
7865 if (TARGET_THUMB1)
7866 {
7867 *total = thumb1_size_rtx_costs (x, code, outer_code);
7868 return true;
7869 }
7870
7871 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7872 switch (code)
7873 {
7874 case MEM:
7875 /* A memory access costs 1 insn if the mode is small, or the address is
7876 a single register, otherwise it costs one insn per word. */
7877 if (REG_P (XEXP (x, 0)))
7878 *total = COSTS_N_INSNS (1);
7879 else if (flag_pic
7880 && GET_CODE (XEXP (x, 0)) == PLUS
7881 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7882 /* This will be split into two instructions.
7883 See arm.md:calculate_pic_address. */
7884 *total = COSTS_N_INSNS (2);
7885 else
7886 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7887 return true;
7888
7889 case DIV:
7890 case MOD:
7891 case UDIV:
7892 case UMOD:
7893 /* Needs a libcall, so it costs about this. */
7894 *total = COSTS_N_INSNS (2);
7895 return false;
7896
7897 case ROTATE:
7898 if (mode == SImode && REG_P (XEXP (x, 1)))
7899 {
7900 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7901 return true;
7902 }
7903 /* Fall through */
7904 case ROTATERT:
7905 case ASHIFT:
7906 case LSHIFTRT:
7907 case ASHIFTRT:
7908 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
7909 {
7910 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7911 return true;
7912 }
7913 else if (mode == SImode)
7914 {
7915 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7916 /* Slightly disparage register shifts, but not by much. */
7917 if (!CONST_INT_P (XEXP (x, 1)))
7918 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7919 return true;
7920 }
7921
7922 /* Needs a libcall. */
7923 *total = COSTS_N_INSNS (2);
7924 return false;
7925
7926 case MINUS:
7927 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7928 && (mode == SFmode || !TARGET_VFP_SINGLE))
7929 {
7930 *total = COSTS_N_INSNS (1);
7931 return false;
7932 }
7933
7934 if (mode == SImode)
7935 {
7936 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7937 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7938
7939 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7940 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7941 || subcode1 == ROTATE || subcode1 == ROTATERT
7942 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7943 || subcode1 == ASHIFTRT)
7944 {
7945 /* It's just the cost of the two operands. */
7946 *total = 0;
7947 return false;
7948 }
7949
7950 *total = COSTS_N_INSNS (1);
7951 return false;
7952 }
7953
7954 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7955 return false;
7956
7957 case PLUS:
7958 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7959 && (mode == SFmode || !TARGET_VFP_SINGLE))
7960 {
7961 *total = COSTS_N_INSNS (1);
7962 return false;
7963 }
7964
7965 /* A shift as a part of ADD costs nothing. */
7966 if (GET_CODE (XEXP (x, 0)) == MULT
7967 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7968 {
7969 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7970 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7971 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7972 return true;
7973 }
7974
7975 /* Fall through */
7976 case AND: case XOR: case IOR:
7977 if (mode == SImode)
7978 {
7979 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7980
7981 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7982 || subcode == LSHIFTRT || subcode == ASHIFTRT
7983 || (code == AND && subcode == NOT))
7984 {
7985 /* It's just the cost of the two operands. */
7986 *total = 0;
7987 return false;
7988 }
7989 }
7990
7991 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7992 return false;
7993
7994 case MULT:
7995 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7996 return false;
7997
7998 case NEG:
7999 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8000 && (mode == SFmode || !TARGET_VFP_SINGLE))
8001 {
8002 *total = COSTS_N_INSNS (1);
8003 return false;
8004 }
8005
8006 /* Fall through */
8007 case NOT:
8008 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8009
8010 return false;
8011
8012 case IF_THEN_ELSE:
8013 *total = 0;
8014 return false;
8015
8016 case COMPARE:
8017 if (cc_register (XEXP (x, 0), VOIDmode))
8018 * total = 0;
8019 else
8020 *total = COSTS_N_INSNS (1);
8021 return false;
8022
8023 case ABS:
8024 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8025 && (mode == SFmode || !TARGET_VFP_SINGLE))
8026 *total = COSTS_N_INSNS (1);
8027 else
8028 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8029 return false;
8030
8031 case SIGN_EXTEND:
8032 case ZERO_EXTEND:
8033 return arm_rtx_costs_1 (x, outer_code, total, 0);
8034
8035 case CONST_INT:
8036 if (const_ok_for_arm (INTVAL (x)))
8037 /* A multiplication by a constant requires another instruction
8038 to load the constant to a register. */
8039 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8040 ? 1 : 0);
8041 else if (const_ok_for_arm (~INTVAL (x)))
8042 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8043 else if (const_ok_for_arm (-INTVAL (x)))
8044 {
8045 if (outer_code == COMPARE || outer_code == PLUS
8046 || outer_code == MINUS)
8047 *total = 0;
8048 else
8049 *total = COSTS_N_INSNS (1);
8050 }
8051 else
8052 *total = COSTS_N_INSNS (2);
8053 return true;
8054
8055 case CONST:
8056 case LABEL_REF:
8057 case SYMBOL_REF:
8058 *total = COSTS_N_INSNS (2);
8059 return true;
8060
8061 case CONST_DOUBLE:
8062 *total = COSTS_N_INSNS (4);
8063 return true;
8064
8065 case CONST_VECTOR:
8066 if (TARGET_NEON
8067 && TARGET_HARD_FLOAT
8068 && outer_code == SET
8069 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8070 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8071 *total = COSTS_N_INSNS (1);
8072 else
8073 *total = COSTS_N_INSNS (4);
8074 return true;
8075
8076 case HIGH:
8077 case LO_SUM:
8078 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8079 cost of these slightly. */
8080 *total = COSTS_N_INSNS (1) + 1;
8081 return true;
8082
8083 case SET:
8084 return false;
8085
8086 default:
8087 if (mode != VOIDmode)
8088 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8089 else
8090 *total = COSTS_N_INSNS (4); /* How knows? */
8091 return false;
8092 }
8093 }
8094
8095 /* RTX costs when optimizing for size. */
8096 static bool
8097 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8098 int *total, bool speed)
8099 {
8100 if (!speed)
8101 return arm_size_rtx_costs (x, (enum rtx_code) code,
8102 (enum rtx_code) outer_code, total);
8103 else
8104 return current_tune->rtx_costs (x, (enum rtx_code) code,
8105 (enum rtx_code) outer_code,
8106 total, speed);
8107 }
8108
8109 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8110 supported on any "slowmul" cores, so it can be ignored. */
8111
8112 static bool
8113 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8114 int *total, bool speed)
8115 {
8116 enum machine_mode mode = GET_MODE (x);
8117
8118 if (TARGET_THUMB)
8119 {
8120 *total = thumb1_rtx_costs (x, code, outer_code);
8121 return true;
8122 }
8123
8124 switch (code)
8125 {
8126 case MULT:
8127 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8128 || mode == DImode)
8129 {
8130 *total = COSTS_N_INSNS (20);
8131 return false;
8132 }
8133
8134 if (CONST_INT_P (XEXP (x, 1)))
8135 {
8136 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8137 & (unsigned HOST_WIDE_INT) 0xffffffff);
8138 int cost, const_ok = const_ok_for_arm (i);
8139 int j, booth_unit_size;
8140
8141 /* Tune as appropriate. */
8142 cost = const_ok ? 4 : 8;
8143 booth_unit_size = 2;
8144 for (j = 0; i && j < 32; j += booth_unit_size)
8145 {
8146 i >>= booth_unit_size;
8147 cost++;
8148 }
8149
8150 *total = COSTS_N_INSNS (cost);
8151 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8152 return true;
8153 }
8154
8155 *total = COSTS_N_INSNS (20);
8156 return false;
8157
8158 default:
8159 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8160 }
8161 }
8162
8163
8164 /* RTX cost for cores with a fast multiply unit (M variants). */
8165
8166 static bool
8167 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8168 int *total, bool speed)
8169 {
8170 enum machine_mode mode = GET_MODE (x);
8171
8172 if (TARGET_THUMB1)
8173 {
8174 *total = thumb1_rtx_costs (x, code, outer_code);
8175 return true;
8176 }
8177
8178 /* ??? should thumb2 use different costs? */
8179 switch (code)
8180 {
8181 case MULT:
8182 /* There is no point basing this on the tuning, since it is always the
8183 fast variant if it exists at all. */
8184 if (mode == DImode
8185 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8186 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8187 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8188 {
8189 *total = COSTS_N_INSNS(2);
8190 return false;
8191 }
8192
8193
8194 if (mode == DImode)
8195 {
8196 *total = COSTS_N_INSNS (5);
8197 return false;
8198 }
8199
8200 if (CONST_INT_P (XEXP (x, 1)))
8201 {
8202 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8203 & (unsigned HOST_WIDE_INT) 0xffffffff);
8204 int cost, const_ok = const_ok_for_arm (i);
8205 int j, booth_unit_size;
8206
8207 /* Tune as appropriate. */
8208 cost = const_ok ? 4 : 8;
8209 booth_unit_size = 8;
8210 for (j = 0; i && j < 32; j += booth_unit_size)
8211 {
8212 i >>= booth_unit_size;
8213 cost++;
8214 }
8215
8216 *total = COSTS_N_INSNS(cost);
8217 return false;
8218 }
8219
8220 if (mode == SImode)
8221 {
8222 *total = COSTS_N_INSNS (4);
8223 return false;
8224 }
8225
8226 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8227 {
8228 if (TARGET_HARD_FLOAT
8229 && (mode == SFmode
8230 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8231 {
8232 *total = COSTS_N_INSNS (1);
8233 return false;
8234 }
8235 }
8236
8237 /* Requires a lib call */
8238 *total = COSTS_N_INSNS (20);
8239 return false;
8240
8241 default:
8242 return arm_rtx_costs_1 (x, outer_code, total, speed);
8243 }
8244 }
8245
8246
8247 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8248 so it can be ignored. */
8249
8250 static bool
8251 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8252 int *total, bool speed)
8253 {
8254 enum machine_mode mode = GET_MODE (x);
8255
8256 if (TARGET_THUMB)
8257 {
8258 *total = thumb1_rtx_costs (x, code, outer_code);
8259 return true;
8260 }
8261
8262 switch (code)
8263 {
8264 case COMPARE:
8265 if (GET_CODE (XEXP (x, 0)) != MULT)
8266 return arm_rtx_costs_1 (x, outer_code, total, speed);
8267
8268 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8269 will stall until the multiplication is complete. */
8270 *total = COSTS_N_INSNS (3);
8271 return false;
8272
8273 case MULT:
8274 /* There is no point basing this on the tuning, since it is always the
8275 fast variant if it exists at all. */
8276 if (mode == DImode
8277 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8278 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8279 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8280 {
8281 *total = COSTS_N_INSNS (2);
8282 return false;
8283 }
8284
8285
8286 if (mode == DImode)
8287 {
8288 *total = COSTS_N_INSNS (5);
8289 return false;
8290 }
8291
8292 if (CONST_INT_P (XEXP (x, 1)))
8293 {
8294 /* If operand 1 is a constant we can more accurately
8295 calculate the cost of the multiply. The multiplier can
8296 retire 15 bits on the first cycle and a further 12 on the
8297 second. We do, of course, have to load the constant into
8298 a register first. */
8299 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8300 /* There's a general overhead of one cycle. */
8301 int cost = 1;
8302 unsigned HOST_WIDE_INT masked_const;
8303
8304 if (i & 0x80000000)
8305 i = ~i;
8306
8307 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8308
8309 masked_const = i & 0xffff8000;
8310 if (masked_const != 0)
8311 {
8312 cost++;
8313 masked_const = i & 0xf8000000;
8314 if (masked_const != 0)
8315 cost++;
8316 }
8317 *total = COSTS_N_INSNS (cost);
8318 return false;
8319 }
8320
8321 if (mode == SImode)
8322 {
8323 *total = COSTS_N_INSNS (3);
8324 return false;
8325 }
8326
8327 /* Requires a lib call */
8328 *total = COSTS_N_INSNS (20);
8329 return false;
8330
8331 default:
8332 return arm_rtx_costs_1 (x, outer_code, total, speed);
8333 }
8334 }
8335
8336
8337 /* RTX costs for 9e (and later) cores. */
8338
8339 static bool
8340 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8341 int *total, bool speed)
8342 {
8343 enum machine_mode mode = GET_MODE (x);
8344
8345 if (TARGET_THUMB1)
8346 {
8347 switch (code)
8348 {
8349 case MULT:
8350 *total = COSTS_N_INSNS (3);
8351 return true;
8352
8353 default:
8354 *total = thumb1_rtx_costs (x, code, outer_code);
8355 return true;
8356 }
8357 }
8358
8359 switch (code)
8360 {
8361 case MULT:
8362 /* There is no point basing this on the tuning, since it is always the
8363 fast variant if it exists at all. */
8364 if (mode == DImode
8365 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8366 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8367 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8368 {
8369 *total = COSTS_N_INSNS (2);
8370 return false;
8371 }
8372
8373
8374 if (mode == DImode)
8375 {
8376 *total = COSTS_N_INSNS (5);
8377 return false;
8378 }
8379
8380 if (mode == SImode)
8381 {
8382 *total = COSTS_N_INSNS (2);
8383 return false;
8384 }
8385
8386 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8387 {
8388 if (TARGET_HARD_FLOAT
8389 && (mode == SFmode
8390 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8391 {
8392 *total = COSTS_N_INSNS (1);
8393 return false;
8394 }
8395 }
8396
8397 *total = COSTS_N_INSNS (20);
8398 return false;
8399
8400 default:
8401 return arm_rtx_costs_1 (x, outer_code, total, speed);
8402 }
8403 }
8404 /* All address computations that can be done are free, but rtx cost returns
8405 the same for practically all of them. So we weight the different types
8406 of address here in the order (most pref first):
8407 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8408 static inline int
8409 arm_arm_address_cost (rtx x)
8410 {
8411 enum rtx_code c = GET_CODE (x);
8412
8413 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8414 return 0;
8415 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8416 return 10;
8417
8418 if (c == PLUS)
8419 {
8420 if (CONST_INT_P (XEXP (x, 1)))
8421 return 2;
8422
8423 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8424 return 3;
8425
8426 return 4;
8427 }
8428
8429 return 6;
8430 }
8431
8432 static inline int
8433 arm_thumb_address_cost (rtx x)
8434 {
8435 enum rtx_code c = GET_CODE (x);
8436
8437 if (c == REG)
8438 return 1;
8439 if (c == PLUS
8440 && REG_P (XEXP (x, 0))
8441 && CONST_INT_P (XEXP (x, 1)))
8442 return 1;
8443
8444 return 2;
8445 }
8446
8447 static int
8448 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8449 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8450 {
8451 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8452 }
8453
8454 /* Adjust cost hook for XScale. */
8455 static bool
8456 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8457 {
8458 /* Some true dependencies can have a higher cost depending
8459 on precisely how certain input operands are used. */
8460 if (REG_NOTE_KIND(link) == 0
8461 && recog_memoized (insn) >= 0
8462 && recog_memoized (dep) >= 0)
8463 {
8464 int shift_opnum = get_attr_shift (insn);
8465 enum attr_type attr_type = get_attr_type (dep);
8466
8467 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8468 operand for INSN. If we have a shifted input operand and the
8469 instruction we depend on is another ALU instruction, then we may
8470 have to account for an additional stall. */
8471 if (shift_opnum != 0
8472 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8473 {
8474 rtx shifted_operand;
8475 int opno;
8476
8477 /* Get the shifted operand. */
8478 extract_insn (insn);
8479 shifted_operand = recog_data.operand[shift_opnum];
8480
8481 /* Iterate over all the operands in DEP. If we write an operand
8482 that overlaps with SHIFTED_OPERAND, then we have increase the
8483 cost of this dependency. */
8484 extract_insn (dep);
8485 preprocess_constraints ();
8486 for (opno = 0; opno < recog_data.n_operands; opno++)
8487 {
8488 /* We can ignore strict inputs. */
8489 if (recog_data.operand_type[opno] == OP_IN)
8490 continue;
8491
8492 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8493 shifted_operand))
8494 {
8495 *cost = 2;
8496 return false;
8497 }
8498 }
8499 }
8500 }
8501 return true;
8502 }
8503
8504 /* Adjust cost hook for Cortex A9. */
8505 static bool
8506 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8507 {
8508 switch (REG_NOTE_KIND (link))
8509 {
8510 case REG_DEP_ANTI:
8511 *cost = 0;
8512 return false;
8513
8514 case REG_DEP_TRUE:
8515 case REG_DEP_OUTPUT:
8516 if (recog_memoized (insn) >= 0
8517 && recog_memoized (dep) >= 0)
8518 {
8519 if (GET_CODE (PATTERN (insn)) == SET)
8520 {
8521 if (GET_MODE_CLASS
8522 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8523 || GET_MODE_CLASS
8524 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8525 {
8526 enum attr_type attr_type_insn = get_attr_type (insn);
8527 enum attr_type attr_type_dep = get_attr_type (dep);
8528
8529 /* By default all dependencies of the form
8530 s0 = s0 <op> s1
8531 s0 = s0 <op> s2
8532 have an extra latency of 1 cycle because
8533 of the input and output dependency in this
8534 case. However this gets modeled as an true
8535 dependency and hence all these checks. */
8536 if (REG_P (SET_DEST (PATTERN (insn)))
8537 && REG_P (SET_DEST (PATTERN (dep)))
8538 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8539 SET_DEST (PATTERN (dep))))
8540 {
8541 /* FMACS is a special case where the dependent
8542 instruction can be issued 3 cycles before
8543 the normal latency in case of an output
8544 dependency. */
8545 if ((attr_type_insn == TYPE_FMACS
8546 || attr_type_insn == TYPE_FMACD)
8547 && (attr_type_dep == TYPE_FMACS
8548 || attr_type_dep == TYPE_FMACD))
8549 {
8550 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8551 *cost = insn_default_latency (dep) - 3;
8552 else
8553 *cost = insn_default_latency (dep);
8554 return false;
8555 }
8556 else
8557 {
8558 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8559 *cost = insn_default_latency (dep) + 1;
8560 else
8561 *cost = insn_default_latency (dep);
8562 }
8563 return false;
8564 }
8565 }
8566 }
8567 }
8568 break;
8569
8570 default:
8571 gcc_unreachable ();
8572 }
8573
8574 return true;
8575 }
8576
8577 /* Adjust cost hook for FA726TE. */
8578 static bool
8579 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8580 {
8581 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8582 have penalty of 3. */
8583 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8584 && recog_memoized (insn) >= 0
8585 && recog_memoized (dep) >= 0
8586 && get_attr_conds (dep) == CONDS_SET)
8587 {
8588 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8589 if (get_attr_conds (insn) == CONDS_USE
8590 && get_attr_type (insn) != TYPE_BRANCH)
8591 {
8592 *cost = 3;
8593 return false;
8594 }
8595
8596 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8597 || get_attr_conds (insn) == CONDS_USE)
8598 {
8599 *cost = 0;
8600 return false;
8601 }
8602 }
8603
8604 return true;
8605 }
8606
8607 /* Implement TARGET_REGISTER_MOVE_COST.
8608
8609 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8610 it is typically more expensive than a single memory access. We set
8611 the cost to less than two memory accesses so that floating
8612 point to integer conversion does not go through memory. */
8613
8614 int
8615 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8616 reg_class_t from, reg_class_t to)
8617 {
8618 if (TARGET_32BIT)
8619 {
8620 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8621 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8622 return 15;
8623 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8624 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8625 return 4;
8626 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8627 return 20;
8628 else
8629 return 2;
8630 }
8631 else
8632 {
8633 if (from == HI_REGS || to == HI_REGS)
8634 return 4;
8635 else
8636 return 2;
8637 }
8638 }
8639
8640 /* Implement TARGET_MEMORY_MOVE_COST. */
8641
8642 int
8643 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8644 bool in ATTRIBUTE_UNUSED)
8645 {
8646 if (TARGET_32BIT)
8647 return 10;
8648 else
8649 {
8650 if (GET_MODE_SIZE (mode) < 4)
8651 return 8;
8652 else
8653 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8654 }
8655 }
8656
8657 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8658 It corrects the value of COST based on the relationship between
8659 INSN and DEP through the dependence LINK. It returns the new
8660 value. There is a per-core adjust_cost hook to adjust scheduler costs
8661 and the per-core hook can choose to completely override the generic
8662 adjust_cost function. Only put bits of code into arm_adjust_cost that
8663 are common across all cores. */
8664 static int
8665 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8666 {
8667 rtx i_pat, d_pat;
8668
8669 /* When generating Thumb-1 code, we want to place flag-setting operations
8670 close to a conditional branch which depends on them, so that we can
8671 omit the comparison. */
8672 if (TARGET_THUMB1
8673 && REG_NOTE_KIND (link) == 0
8674 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8675 && recog_memoized (dep) >= 0
8676 && get_attr_conds (dep) == CONDS_SET)
8677 return 0;
8678
8679 if (current_tune->sched_adjust_cost != NULL)
8680 {
8681 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8682 return cost;
8683 }
8684
8685 /* XXX Is this strictly true? */
8686 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8687 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8688 return 0;
8689
8690 /* Call insns don't incur a stall, even if they follow a load. */
8691 if (REG_NOTE_KIND (link) == 0
8692 && CALL_P (insn))
8693 return 1;
8694
8695 if ((i_pat = single_set (insn)) != NULL
8696 && MEM_P (SET_SRC (i_pat))
8697 && (d_pat = single_set (dep)) != NULL
8698 && MEM_P (SET_DEST (d_pat)))
8699 {
8700 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8701 /* This is a load after a store, there is no conflict if the load reads
8702 from a cached area. Assume that loads from the stack, and from the
8703 constant pool are cached, and that others will miss. This is a
8704 hack. */
8705
8706 if ((GET_CODE (src_mem) == SYMBOL_REF
8707 && CONSTANT_POOL_ADDRESS_P (src_mem))
8708 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8709 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8710 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8711 return 1;
8712 }
8713
8714 return cost;
8715 }
8716
8717 static int
8718 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8719 {
8720 if (TARGET_32BIT)
8721 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8722 else
8723 return (optimize > 0) ? 2 : 0;
8724 }
8725
8726 static int
8727 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8728 {
8729 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8730 }
8731
8732 static bool fp_consts_inited = false;
8733
8734 static REAL_VALUE_TYPE value_fp0;
8735
8736 static void
8737 init_fp_table (void)
8738 {
8739 REAL_VALUE_TYPE r;
8740
8741 r = REAL_VALUE_ATOF ("0", DFmode);
8742 value_fp0 = r;
8743 fp_consts_inited = true;
8744 }
8745
8746 /* Return TRUE if rtx X is a valid immediate FP constant. */
8747 int
8748 arm_const_double_rtx (rtx x)
8749 {
8750 REAL_VALUE_TYPE r;
8751
8752 if (!fp_consts_inited)
8753 init_fp_table ();
8754
8755 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8756 if (REAL_VALUE_MINUS_ZERO (r))
8757 return 0;
8758
8759 if (REAL_VALUES_EQUAL (r, value_fp0))
8760 return 1;
8761
8762 return 0;
8763 }
8764
8765 /* VFPv3 has a fairly wide range of representable immediates, formed from
8766 "quarter-precision" floating-point values. These can be evaluated using this
8767 formula (with ^ for exponentiation):
8768
8769 -1^s * n * 2^-r
8770
8771 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8772 16 <= n <= 31 and 0 <= r <= 7.
8773
8774 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8775
8776 - A (most-significant) is the sign bit.
8777 - BCD are the exponent (encoded as r XOR 3).
8778 - EFGH are the mantissa (encoded as n - 16).
8779 */
8780
8781 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8782 fconst[sd] instruction, or -1 if X isn't suitable. */
8783 static int
8784 vfp3_const_double_index (rtx x)
8785 {
8786 REAL_VALUE_TYPE r, m;
8787 int sign, exponent;
8788 unsigned HOST_WIDE_INT mantissa, mant_hi;
8789 unsigned HOST_WIDE_INT mask;
8790 HOST_WIDE_INT m1, m2;
8791 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8792
8793 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
8794 return -1;
8795
8796 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8797
8798 /* We can't represent these things, so detect them first. */
8799 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8800 return -1;
8801
8802 /* Extract sign, exponent and mantissa. */
8803 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8804 r = real_value_abs (&r);
8805 exponent = REAL_EXP (&r);
8806 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8807 highest (sign) bit, with a fixed binary point at bit point_pos.
8808 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8809 bits for the mantissa, this may fail (low bits would be lost). */
8810 real_ldexp (&m, &r, point_pos - exponent);
8811 REAL_VALUE_TO_INT (&m1, &m2, m);
8812 mantissa = m1;
8813 mant_hi = m2;
8814
8815 /* If there are bits set in the low part of the mantissa, we can't
8816 represent this value. */
8817 if (mantissa != 0)
8818 return -1;
8819
8820 /* Now make it so that mantissa contains the most-significant bits, and move
8821 the point_pos to indicate that the least-significant bits have been
8822 discarded. */
8823 point_pos -= HOST_BITS_PER_WIDE_INT;
8824 mantissa = mant_hi;
8825
8826 /* We can permit four significant bits of mantissa only, plus a high bit
8827 which is always 1. */
8828 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8829 if ((mantissa & mask) != 0)
8830 return -1;
8831
8832 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8833 mantissa >>= point_pos - 5;
8834
8835 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8836 floating-point immediate zero with Neon using an integer-zero load, but
8837 that case is handled elsewhere.) */
8838 if (mantissa == 0)
8839 return -1;
8840
8841 gcc_assert (mantissa >= 16 && mantissa <= 31);
8842
8843 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8844 normalized significands are in the range [1, 2). (Our mantissa is shifted
8845 left 4 places at this point relative to normalized IEEE754 values). GCC
8846 internally uses [0.5, 1) (see real.c), so the exponent returned from
8847 REAL_EXP must be altered. */
8848 exponent = 5 - exponent;
8849
8850 if (exponent < 0 || exponent > 7)
8851 return -1;
8852
8853 /* Sign, mantissa and exponent are now in the correct form to plug into the
8854 formula described in the comment above. */
8855 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8856 }
8857
8858 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8859 int
8860 vfp3_const_double_rtx (rtx x)
8861 {
8862 if (!TARGET_VFP3)
8863 return 0;
8864
8865 return vfp3_const_double_index (x) != -1;
8866 }
8867
8868 /* Recognize immediates which can be used in various Neon instructions. Legal
8869 immediates are described by the following table (for VMVN variants, the
8870 bitwise inverse of the constant shown is recognized. In either case, VMOV
8871 is output and the correct instruction to use for a given constant is chosen
8872 by the assembler). The constant shown is replicated across all elements of
8873 the destination vector.
8874
8875 insn elems variant constant (binary)
8876 ---- ----- ------- -----------------
8877 vmov i32 0 00000000 00000000 00000000 abcdefgh
8878 vmov i32 1 00000000 00000000 abcdefgh 00000000
8879 vmov i32 2 00000000 abcdefgh 00000000 00000000
8880 vmov i32 3 abcdefgh 00000000 00000000 00000000
8881 vmov i16 4 00000000 abcdefgh
8882 vmov i16 5 abcdefgh 00000000
8883 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8884 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8885 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8886 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8887 vmvn i16 10 00000000 abcdefgh
8888 vmvn i16 11 abcdefgh 00000000
8889 vmov i32 12 00000000 00000000 abcdefgh 11111111
8890 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8891 vmov i32 14 00000000 abcdefgh 11111111 11111111
8892 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8893 vmov i8 16 abcdefgh
8894 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8895 eeeeeeee ffffffff gggggggg hhhhhhhh
8896 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8897 vmov f32 19 00000000 00000000 00000000 00000000
8898
8899 For case 18, B = !b. Representable values are exactly those accepted by
8900 vfp3_const_double_index, but are output as floating-point numbers rather
8901 than indices.
8902
8903 For case 19, we will change it to vmov.i32 when assembling.
8904
8905 Variants 0-5 (inclusive) may also be used as immediates for the second
8906 operand of VORR/VBIC instructions.
8907
8908 The INVERSE argument causes the bitwise inverse of the given operand to be
8909 recognized instead (used for recognizing legal immediates for the VAND/VORN
8910 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8911 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8912 output, rather than the real insns vbic/vorr).
8913
8914 INVERSE makes no difference to the recognition of float vectors.
8915
8916 The return value is the variant of immediate as shown in the above table, or
8917 -1 if the given value doesn't match any of the listed patterns.
8918 */
8919 static int
8920 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8921 rtx *modconst, int *elementwidth)
8922 {
8923 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8924 matches = 1; \
8925 for (i = 0; i < idx; i += (STRIDE)) \
8926 if (!(TEST)) \
8927 matches = 0; \
8928 if (matches) \
8929 { \
8930 immtype = (CLASS); \
8931 elsize = (ELSIZE); \
8932 break; \
8933 }
8934
8935 unsigned int i, elsize = 0, idx = 0, n_elts;
8936 unsigned int innersize;
8937 unsigned char bytes[16];
8938 int immtype = -1, matches;
8939 unsigned int invmask = inverse ? 0xff : 0;
8940 bool vector = GET_CODE (op) == CONST_VECTOR;
8941
8942 if (vector)
8943 {
8944 n_elts = CONST_VECTOR_NUNITS (op);
8945 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8946 }
8947 else
8948 {
8949 n_elts = 1;
8950 if (mode == VOIDmode)
8951 mode = DImode;
8952 innersize = GET_MODE_SIZE (mode);
8953 }
8954
8955 /* Vectors of float constants. */
8956 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8957 {
8958 rtx el0 = CONST_VECTOR_ELT (op, 0);
8959 REAL_VALUE_TYPE r0;
8960
8961 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
8962 return -1;
8963
8964 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8965
8966 for (i = 1; i < n_elts; i++)
8967 {
8968 rtx elt = CONST_VECTOR_ELT (op, i);
8969 REAL_VALUE_TYPE re;
8970
8971 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8972
8973 if (!REAL_VALUES_EQUAL (r0, re))
8974 return -1;
8975 }
8976
8977 if (modconst)
8978 *modconst = CONST_VECTOR_ELT (op, 0);
8979
8980 if (elementwidth)
8981 *elementwidth = 0;
8982
8983 if (el0 == CONST0_RTX (GET_MODE (el0)))
8984 return 19;
8985 else
8986 return 18;
8987 }
8988
8989 /* Splat vector constant out into a byte vector. */
8990 for (i = 0; i < n_elts; i++)
8991 {
8992 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8993 unsigned HOST_WIDE_INT elpart;
8994 unsigned int part, parts;
8995
8996 if (CONST_INT_P (el))
8997 {
8998 elpart = INTVAL (el);
8999 parts = 1;
9000 }
9001 else if (CONST_DOUBLE_P (el))
9002 {
9003 elpart = CONST_DOUBLE_LOW (el);
9004 parts = 2;
9005 }
9006 else
9007 gcc_unreachable ();
9008
9009 for (part = 0; part < parts; part++)
9010 {
9011 unsigned int byte;
9012 for (byte = 0; byte < innersize; byte++)
9013 {
9014 bytes[idx++] = (elpart & 0xff) ^ invmask;
9015 elpart >>= BITS_PER_UNIT;
9016 }
9017 if (CONST_DOUBLE_P (el))
9018 elpart = CONST_DOUBLE_HIGH (el);
9019 }
9020 }
9021
9022 /* Sanity check. */
9023 gcc_assert (idx == GET_MODE_SIZE (mode));
9024
9025 do
9026 {
9027 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9028 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9029
9030 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9031 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9032
9033 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9034 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9035
9036 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9037 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9038
9039 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9040
9041 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9042
9043 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9044 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9045
9046 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9047 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9048
9049 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9050 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9051
9052 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9053 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9054
9055 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9056
9057 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9058
9059 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9061
9062 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9063 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9064
9065 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9066 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9067
9068 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9069 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9070
9071 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9072
9073 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9074 && bytes[i] == bytes[(i + 8) % idx]);
9075 }
9076 while (0);
9077
9078 if (immtype == -1)
9079 return -1;
9080
9081 if (elementwidth)
9082 *elementwidth = elsize;
9083
9084 if (modconst)
9085 {
9086 unsigned HOST_WIDE_INT imm = 0;
9087
9088 /* Un-invert bytes of recognized vector, if necessary. */
9089 if (invmask != 0)
9090 for (i = 0; i < idx; i++)
9091 bytes[i] ^= invmask;
9092
9093 if (immtype == 17)
9094 {
9095 /* FIXME: Broken on 32-bit H_W_I hosts. */
9096 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9097
9098 for (i = 0; i < 8; i++)
9099 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9100 << (i * BITS_PER_UNIT);
9101
9102 *modconst = GEN_INT (imm);
9103 }
9104 else
9105 {
9106 unsigned HOST_WIDE_INT imm = 0;
9107
9108 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9109 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9110
9111 *modconst = GEN_INT (imm);
9112 }
9113 }
9114
9115 return immtype;
9116 #undef CHECK
9117 }
9118
9119 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9120 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9121 float elements), and a modified constant (whatever should be output for a
9122 VMOV) in *MODCONST. */
9123
9124 int
9125 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9126 rtx *modconst, int *elementwidth)
9127 {
9128 rtx tmpconst;
9129 int tmpwidth;
9130 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9131
9132 if (retval == -1)
9133 return 0;
9134
9135 if (modconst)
9136 *modconst = tmpconst;
9137
9138 if (elementwidth)
9139 *elementwidth = tmpwidth;
9140
9141 return 1;
9142 }
9143
9144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9145 the immediate is valid, write a constant suitable for using as an operand
9146 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9147 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9148
9149 int
9150 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9151 rtx *modconst, int *elementwidth)
9152 {
9153 rtx tmpconst;
9154 int tmpwidth;
9155 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9156
9157 if (retval < 0 || retval > 5)
9158 return 0;
9159
9160 if (modconst)
9161 *modconst = tmpconst;
9162
9163 if (elementwidth)
9164 *elementwidth = tmpwidth;
9165
9166 return 1;
9167 }
9168
9169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9170 the immediate is valid, write a constant suitable for using as an operand
9171 to VSHR/VSHL to *MODCONST and the corresponding element width to
9172 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9173 because they have different limitations. */
9174
9175 int
9176 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9177 rtx *modconst, int *elementwidth,
9178 bool isleftshift)
9179 {
9180 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9181 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9182 unsigned HOST_WIDE_INT last_elt = 0;
9183 unsigned HOST_WIDE_INT maxshift;
9184
9185 /* Split vector constant out into a byte vector. */
9186 for (i = 0; i < n_elts; i++)
9187 {
9188 rtx el = CONST_VECTOR_ELT (op, i);
9189 unsigned HOST_WIDE_INT elpart;
9190
9191 if (CONST_INT_P (el))
9192 elpart = INTVAL (el);
9193 else if (CONST_DOUBLE_P (el))
9194 return 0;
9195 else
9196 gcc_unreachable ();
9197
9198 if (i != 0 && elpart != last_elt)
9199 return 0;
9200
9201 last_elt = elpart;
9202 }
9203
9204 /* Shift less than element size. */
9205 maxshift = innersize * 8;
9206
9207 if (isleftshift)
9208 {
9209 /* Left shift immediate value can be from 0 to <size>-1. */
9210 if (last_elt >= maxshift)
9211 return 0;
9212 }
9213 else
9214 {
9215 /* Right shift immediate value can be from 1 to <size>. */
9216 if (last_elt == 0 || last_elt > maxshift)
9217 return 0;
9218 }
9219
9220 if (elementwidth)
9221 *elementwidth = innersize * 8;
9222
9223 if (modconst)
9224 *modconst = CONST_VECTOR_ELT (op, 0);
9225
9226 return 1;
9227 }
9228
9229 /* Return a string suitable for output of Neon immediate logic operation
9230 MNEM. */
9231
9232 char *
9233 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9234 int inverse, int quad)
9235 {
9236 int width, is_valid;
9237 static char templ[40];
9238
9239 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9240
9241 gcc_assert (is_valid != 0);
9242
9243 if (quad)
9244 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9245 else
9246 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9247
9248 return templ;
9249 }
9250
9251 /* Return a string suitable for output of Neon immediate shift operation
9252 (VSHR or VSHL) MNEM. */
9253
9254 char *
9255 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9256 enum machine_mode mode, int quad,
9257 bool isleftshift)
9258 {
9259 int width, is_valid;
9260 static char templ[40];
9261
9262 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9263 gcc_assert (is_valid != 0);
9264
9265 if (quad)
9266 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9267 else
9268 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9269
9270 return templ;
9271 }
9272
9273 /* Output a sequence of pairwise operations to implement a reduction.
9274 NOTE: We do "too much work" here, because pairwise operations work on two
9275 registers-worth of operands in one go. Unfortunately we can't exploit those
9276 extra calculations to do the full operation in fewer steps, I don't think.
9277 Although all vector elements of the result but the first are ignored, we
9278 actually calculate the same result in each of the elements. An alternative
9279 such as initially loading a vector with zero to use as each of the second
9280 operands would use up an additional register and take an extra instruction,
9281 for no particular gain. */
9282
9283 void
9284 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9285 rtx (*reduc) (rtx, rtx, rtx))
9286 {
9287 enum machine_mode inner = GET_MODE_INNER (mode);
9288 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9289 rtx tmpsum = op1;
9290
9291 for (i = parts / 2; i >= 1; i /= 2)
9292 {
9293 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9294 emit_insn (reduc (dest, tmpsum, tmpsum));
9295 tmpsum = dest;
9296 }
9297 }
9298
9299 /* If VALS is a vector constant that can be loaded into a register
9300 using VDUP, generate instructions to do so and return an RTX to
9301 assign to the register. Otherwise return NULL_RTX. */
9302
9303 static rtx
9304 neon_vdup_constant (rtx vals)
9305 {
9306 enum machine_mode mode = GET_MODE (vals);
9307 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9308 int n_elts = GET_MODE_NUNITS (mode);
9309 bool all_same = true;
9310 rtx x;
9311 int i;
9312
9313 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9314 return NULL_RTX;
9315
9316 for (i = 0; i < n_elts; ++i)
9317 {
9318 x = XVECEXP (vals, 0, i);
9319 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9320 all_same = false;
9321 }
9322
9323 if (!all_same)
9324 /* The elements are not all the same. We could handle repeating
9325 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9326 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9327 vdup.i16). */
9328 return NULL_RTX;
9329
9330 /* We can load this constant by using VDUP and a constant in a
9331 single ARM register. This will be cheaper than a vector
9332 load. */
9333
9334 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9335 return gen_rtx_VEC_DUPLICATE (mode, x);
9336 }
9337
9338 /* Generate code to load VALS, which is a PARALLEL containing only
9339 constants (for vec_init) or CONST_VECTOR, efficiently into a
9340 register. Returns an RTX to copy into the register, or NULL_RTX
9341 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9342
9343 rtx
9344 neon_make_constant (rtx vals)
9345 {
9346 enum machine_mode mode = GET_MODE (vals);
9347 rtx target;
9348 rtx const_vec = NULL_RTX;
9349 int n_elts = GET_MODE_NUNITS (mode);
9350 int n_const = 0;
9351 int i;
9352
9353 if (GET_CODE (vals) == CONST_VECTOR)
9354 const_vec = vals;
9355 else if (GET_CODE (vals) == PARALLEL)
9356 {
9357 /* A CONST_VECTOR must contain only CONST_INTs and
9358 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9359 Only store valid constants in a CONST_VECTOR. */
9360 for (i = 0; i < n_elts; ++i)
9361 {
9362 rtx x = XVECEXP (vals, 0, i);
9363 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9364 n_const++;
9365 }
9366 if (n_const == n_elts)
9367 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9368 }
9369 else
9370 gcc_unreachable ();
9371
9372 if (const_vec != NULL
9373 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9374 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9375 return const_vec;
9376 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9377 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9378 pipeline cycle; creating the constant takes one or two ARM
9379 pipeline cycles. */
9380 return target;
9381 else if (const_vec != NULL_RTX)
9382 /* Load from constant pool. On Cortex-A8 this takes two cycles
9383 (for either double or quad vectors). We can not take advantage
9384 of single-cycle VLD1 because we need a PC-relative addressing
9385 mode. */
9386 return const_vec;
9387 else
9388 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9389 We can not construct an initializer. */
9390 return NULL_RTX;
9391 }
9392
9393 /* Initialize vector TARGET to VALS. */
9394
9395 void
9396 neon_expand_vector_init (rtx target, rtx vals)
9397 {
9398 enum machine_mode mode = GET_MODE (target);
9399 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9400 int n_elts = GET_MODE_NUNITS (mode);
9401 int n_var = 0, one_var = -1;
9402 bool all_same = true;
9403 rtx x, mem;
9404 int i;
9405
9406 for (i = 0; i < n_elts; ++i)
9407 {
9408 x = XVECEXP (vals, 0, i);
9409 if (!CONSTANT_P (x))
9410 ++n_var, one_var = i;
9411
9412 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9413 all_same = false;
9414 }
9415
9416 if (n_var == 0)
9417 {
9418 rtx constant = neon_make_constant (vals);
9419 if (constant != NULL_RTX)
9420 {
9421 emit_move_insn (target, constant);
9422 return;
9423 }
9424 }
9425
9426 /* Splat a single non-constant element if we can. */
9427 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9428 {
9429 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9430 emit_insn (gen_rtx_SET (VOIDmode, target,
9431 gen_rtx_VEC_DUPLICATE (mode, x)));
9432 return;
9433 }
9434
9435 /* One field is non-constant. Load constant then overwrite varying
9436 field. This is more efficient than using the stack. */
9437 if (n_var == 1)
9438 {
9439 rtx copy = copy_rtx (vals);
9440 rtx index = GEN_INT (one_var);
9441
9442 /* Load constant part of vector, substitute neighboring value for
9443 varying element. */
9444 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9445 neon_expand_vector_init (target, copy);
9446
9447 /* Insert variable. */
9448 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9449 switch (mode)
9450 {
9451 case V8QImode:
9452 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9453 break;
9454 case V16QImode:
9455 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9456 break;
9457 case V4HImode:
9458 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9459 break;
9460 case V8HImode:
9461 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9462 break;
9463 case V2SImode:
9464 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9465 break;
9466 case V4SImode:
9467 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9468 break;
9469 case V2SFmode:
9470 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9471 break;
9472 case V4SFmode:
9473 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9474 break;
9475 case V2DImode:
9476 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9477 break;
9478 default:
9479 gcc_unreachable ();
9480 }
9481 return;
9482 }
9483
9484 /* Construct the vector in memory one field at a time
9485 and load the whole vector. */
9486 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9487 for (i = 0; i < n_elts; i++)
9488 emit_move_insn (adjust_address_nv (mem, inner_mode,
9489 i * GET_MODE_SIZE (inner_mode)),
9490 XVECEXP (vals, 0, i));
9491 emit_move_insn (target, mem);
9492 }
9493
9494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9495 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9496 reported source locations are bogus. */
9497
9498 static void
9499 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9500 const char *err)
9501 {
9502 HOST_WIDE_INT lane;
9503
9504 gcc_assert (CONST_INT_P (operand));
9505
9506 lane = INTVAL (operand);
9507
9508 if (lane < low || lane >= high)
9509 error (err);
9510 }
9511
9512 /* Bounds-check lanes. */
9513
9514 void
9515 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9516 {
9517 bounds_check (operand, low, high, "lane out of range");
9518 }
9519
9520 /* Bounds-check constants. */
9521
9522 void
9523 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9524 {
9525 bounds_check (operand, low, high, "constant out of range");
9526 }
9527
9528 HOST_WIDE_INT
9529 neon_element_bits (enum machine_mode mode)
9530 {
9531 if (mode == DImode)
9532 return GET_MODE_BITSIZE (mode);
9533 else
9534 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9535 }
9536
9537 \f
9538 /* Predicates for `match_operand' and `match_operator'. */
9539
9540 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9541 WB is true if full writeback address modes are allowed and is false
9542 if limited writeback address modes (POST_INC and PRE_DEC) are
9543 allowed. */
9544
9545 int
9546 arm_coproc_mem_operand (rtx op, bool wb)
9547 {
9548 rtx ind;
9549
9550 /* Reject eliminable registers. */
9551 if (! (reload_in_progress || reload_completed)
9552 && ( reg_mentioned_p (frame_pointer_rtx, op)
9553 || reg_mentioned_p (arg_pointer_rtx, op)
9554 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9555 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9556 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9557 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9558 return FALSE;
9559
9560 /* Constants are converted into offsets from labels. */
9561 if (!MEM_P (op))
9562 return FALSE;
9563
9564 ind = XEXP (op, 0);
9565
9566 if (reload_completed
9567 && (GET_CODE (ind) == LABEL_REF
9568 || (GET_CODE (ind) == CONST
9569 && GET_CODE (XEXP (ind, 0)) == PLUS
9570 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9571 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9572 return TRUE;
9573
9574 /* Match: (mem (reg)). */
9575 if (REG_P (ind))
9576 return arm_address_register_rtx_p (ind, 0);
9577
9578 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9579 acceptable in any case (subject to verification by
9580 arm_address_register_rtx_p). We need WB to be true to accept
9581 PRE_INC and POST_DEC. */
9582 if (GET_CODE (ind) == POST_INC
9583 || GET_CODE (ind) == PRE_DEC
9584 || (wb
9585 && (GET_CODE (ind) == PRE_INC
9586 || GET_CODE (ind) == POST_DEC)))
9587 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9588
9589 if (wb
9590 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9591 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9592 && GET_CODE (XEXP (ind, 1)) == PLUS
9593 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9594 ind = XEXP (ind, 1);
9595
9596 /* Match:
9597 (plus (reg)
9598 (const)). */
9599 if (GET_CODE (ind) == PLUS
9600 && REG_P (XEXP (ind, 0))
9601 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9602 && CONST_INT_P (XEXP (ind, 1))
9603 && INTVAL (XEXP (ind, 1)) > -1024
9604 && INTVAL (XEXP (ind, 1)) < 1024
9605 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9606 return TRUE;
9607
9608 return FALSE;
9609 }
9610
9611 /* Return TRUE if OP is a memory operand which we can load or store a vector
9612 to/from. TYPE is one of the following values:
9613 0 - Vector load/stor (vldr)
9614 1 - Core registers (ldm)
9615 2 - Element/structure loads (vld1)
9616 */
9617 int
9618 neon_vector_mem_operand (rtx op, int type)
9619 {
9620 rtx ind;
9621
9622 /* Reject eliminable registers. */
9623 if (! (reload_in_progress || reload_completed)
9624 && ( reg_mentioned_p (frame_pointer_rtx, op)
9625 || reg_mentioned_p (arg_pointer_rtx, op)
9626 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9627 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9628 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9629 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9630 return FALSE;
9631
9632 /* Constants are converted into offsets from labels. */
9633 if (!MEM_P (op))
9634 return FALSE;
9635
9636 ind = XEXP (op, 0);
9637
9638 if (reload_completed
9639 && (GET_CODE (ind) == LABEL_REF
9640 || (GET_CODE (ind) == CONST
9641 && GET_CODE (XEXP (ind, 0)) == PLUS
9642 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9643 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9644 return TRUE;
9645
9646 /* Match: (mem (reg)). */
9647 if (REG_P (ind))
9648 return arm_address_register_rtx_p (ind, 0);
9649
9650 /* Allow post-increment with Neon registers. */
9651 if ((type != 1 && GET_CODE (ind) == POST_INC)
9652 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9653 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9654
9655 /* FIXME: vld1 allows register post-modify. */
9656
9657 /* Match:
9658 (plus (reg)
9659 (const)). */
9660 if (type == 0
9661 && GET_CODE (ind) == PLUS
9662 && REG_P (XEXP (ind, 0))
9663 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9664 && CONST_INT_P (XEXP (ind, 1))
9665 && INTVAL (XEXP (ind, 1)) > -1024
9666 /* For quad modes, we restrict the constant offset to be slightly less
9667 than what the instruction format permits. We have no such constraint
9668 on double mode offsets. (This must match arm_legitimate_index_p.) */
9669 && (INTVAL (XEXP (ind, 1))
9670 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
9671 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9672 return TRUE;
9673
9674 return FALSE;
9675 }
9676
9677 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9678 type. */
9679 int
9680 neon_struct_mem_operand (rtx op)
9681 {
9682 rtx ind;
9683
9684 /* Reject eliminable registers. */
9685 if (! (reload_in_progress || reload_completed)
9686 && ( reg_mentioned_p (frame_pointer_rtx, op)
9687 || reg_mentioned_p (arg_pointer_rtx, op)
9688 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9689 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9690 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9691 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9692 return FALSE;
9693
9694 /* Constants are converted into offsets from labels. */
9695 if (!MEM_P (op))
9696 return FALSE;
9697
9698 ind = XEXP (op, 0);
9699
9700 if (reload_completed
9701 && (GET_CODE (ind) == LABEL_REF
9702 || (GET_CODE (ind) == CONST
9703 && GET_CODE (XEXP (ind, 0)) == PLUS
9704 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9705 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9706 return TRUE;
9707
9708 /* Match: (mem (reg)). */
9709 if (REG_P (ind))
9710 return arm_address_register_rtx_p (ind, 0);
9711
9712 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9713 if (GET_CODE (ind) == POST_INC
9714 || GET_CODE (ind) == PRE_DEC)
9715 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9716
9717 return FALSE;
9718 }
9719
9720 /* Return true if X is a register that will be eliminated later on. */
9721 int
9722 arm_eliminable_register (rtx x)
9723 {
9724 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9725 || REGNO (x) == ARG_POINTER_REGNUM
9726 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9727 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9728 }
9729
9730 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9731 coprocessor registers. Otherwise return NO_REGS. */
9732
9733 enum reg_class
9734 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9735 {
9736 if (mode == HFmode)
9737 {
9738 if (!TARGET_NEON_FP16)
9739 return GENERAL_REGS;
9740 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9741 return NO_REGS;
9742 return GENERAL_REGS;
9743 }
9744
9745 /* The neon move patterns handle all legitimate vector and struct
9746 addresses. */
9747 if (TARGET_NEON
9748 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9749 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9750 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9751 || VALID_NEON_STRUCT_MODE (mode)))
9752 return NO_REGS;
9753
9754 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9755 return NO_REGS;
9756
9757 return GENERAL_REGS;
9758 }
9759
9760 /* Values which must be returned in the most-significant end of the return
9761 register. */
9762
9763 static bool
9764 arm_return_in_msb (const_tree valtype)
9765 {
9766 return (TARGET_AAPCS_BASED
9767 && BYTES_BIG_ENDIAN
9768 && (AGGREGATE_TYPE_P (valtype)
9769 || TREE_CODE (valtype) == COMPLEX_TYPE
9770 || FIXED_POINT_TYPE_P (valtype)));
9771 }
9772
9773 /* Return TRUE if X references a SYMBOL_REF. */
9774 int
9775 symbol_mentioned_p (rtx x)
9776 {
9777 const char * fmt;
9778 int i;
9779
9780 if (GET_CODE (x) == SYMBOL_REF)
9781 return 1;
9782
9783 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9784 are constant offsets, not symbols. */
9785 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9786 return 0;
9787
9788 fmt = GET_RTX_FORMAT (GET_CODE (x));
9789
9790 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9791 {
9792 if (fmt[i] == 'E')
9793 {
9794 int j;
9795
9796 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9797 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9798 return 1;
9799 }
9800 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9801 return 1;
9802 }
9803
9804 return 0;
9805 }
9806
9807 /* Return TRUE if X references a LABEL_REF. */
9808 int
9809 label_mentioned_p (rtx x)
9810 {
9811 const char * fmt;
9812 int i;
9813
9814 if (GET_CODE (x) == LABEL_REF)
9815 return 1;
9816
9817 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9818 instruction, but they are constant offsets, not symbols. */
9819 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9820 return 0;
9821
9822 fmt = GET_RTX_FORMAT (GET_CODE (x));
9823 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9824 {
9825 if (fmt[i] == 'E')
9826 {
9827 int j;
9828
9829 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9830 if (label_mentioned_p (XVECEXP (x, i, j)))
9831 return 1;
9832 }
9833 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9834 return 1;
9835 }
9836
9837 return 0;
9838 }
9839
9840 int
9841 tls_mentioned_p (rtx x)
9842 {
9843 switch (GET_CODE (x))
9844 {
9845 case CONST:
9846 return tls_mentioned_p (XEXP (x, 0));
9847
9848 case UNSPEC:
9849 if (XINT (x, 1) == UNSPEC_TLS)
9850 return 1;
9851
9852 default:
9853 return 0;
9854 }
9855 }
9856
9857 /* Must not copy any rtx that uses a pc-relative address. */
9858
9859 static int
9860 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9861 {
9862 if (GET_CODE (*x) == UNSPEC
9863 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9864 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9865 return 1;
9866 return 0;
9867 }
9868
9869 static bool
9870 arm_cannot_copy_insn_p (rtx insn)
9871 {
9872 /* The tls call insn cannot be copied, as it is paired with a data
9873 word. */
9874 if (recog_memoized (insn) == CODE_FOR_tlscall)
9875 return true;
9876
9877 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9878 }
9879
9880 enum rtx_code
9881 minmax_code (rtx x)
9882 {
9883 enum rtx_code code = GET_CODE (x);
9884
9885 switch (code)
9886 {
9887 case SMAX:
9888 return GE;
9889 case SMIN:
9890 return LE;
9891 case UMIN:
9892 return LEU;
9893 case UMAX:
9894 return GEU;
9895 default:
9896 gcc_unreachable ();
9897 }
9898 }
9899
9900 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9901
9902 bool
9903 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9904 int *mask, bool *signed_sat)
9905 {
9906 /* The high bound must be a power of two minus one. */
9907 int log = exact_log2 (INTVAL (hi_bound) + 1);
9908 if (log == -1)
9909 return false;
9910
9911 /* The low bound is either zero (for usat) or one less than the
9912 negation of the high bound (for ssat). */
9913 if (INTVAL (lo_bound) == 0)
9914 {
9915 if (mask)
9916 *mask = log;
9917 if (signed_sat)
9918 *signed_sat = false;
9919
9920 return true;
9921 }
9922
9923 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9924 {
9925 if (mask)
9926 *mask = log + 1;
9927 if (signed_sat)
9928 *signed_sat = true;
9929
9930 return true;
9931 }
9932
9933 return false;
9934 }
9935
9936 /* Return 1 if memory locations are adjacent. */
9937 int
9938 adjacent_mem_locations (rtx a, rtx b)
9939 {
9940 /* We don't guarantee to preserve the order of these memory refs. */
9941 if (volatile_refs_p (a) || volatile_refs_p (b))
9942 return 0;
9943
9944 if ((REG_P (XEXP (a, 0))
9945 || (GET_CODE (XEXP (a, 0)) == PLUS
9946 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
9947 && (REG_P (XEXP (b, 0))
9948 || (GET_CODE (XEXP (b, 0)) == PLUS
9949 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
9950 {
9951 HOST_WIDE_INT val0 = 0, val1 = 0;
9952 rtx reg0, reg1;
9953 int val_diff;
9954
9955 if (GET_CODE (XEXP (a, 0)) == PLUS)
9956 {
9957 reg0 = XEXP (XEXP (a, 0), 0);
9958 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9959 }
9960 else
9961 reg0 = XEXP (a, 0);
9962
9963 if (GET_CODE (XEXP (b, 0)) == PLUS)
9964 {
9965 reg1 = XEXP (XEXP (b, 0), 0);
9966 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9967 }
9968 else
9969 reg1 = XEXP (b, 0);
9970
9971 /* Don't accept any offset that will require multiple
9972 instructions to handle, since this would cause the
9973 arith_adjacentmem pattern to output an overlong sequence. */
9974 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9975 return 0;
9976
9977 /* Don't allow an eliminable register: register elimination can make
9978 the offset too large. */
9979 if (arm_eliminable_register (reg0))
9980 return 0;
9981
9982 val_diff = val1 - val0;
9983
9984 if (arm_ld_sched)
9985 {
9986 /* If the target has load delay slots, then there's no benefit
9987 to using an ldm instruction unless the offset is zero and
9988 we are optimizing for size. */
9989 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9990 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9991 && (val_diff == 4 || val_diff == -4));
9992 }
9993
9994 return ((REGNO (reg0) == REGNO (reg1))
9995 && (val_diff == 4 || val_diff == -4));
9996 }
9997
9998 return 0;
9999 }
10000
10001 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10002 for load operations, false for store operations. CONSECUTIVE is true
10003 if the register numbers in the operation must be consecutive in the register
10004 bank. RETURN_PC is true if value is to be loaded in PC.
10005 The pattern we are trying to match for load is:
10006 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10007 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10008 :
10009 :
10010 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10011 ]
10012 where
10013 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10014 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10015 3. If consecutive is TRUE, then for kth register being loaded,
10016 REGNO (R_dk) = REGNO (R_d0) + k.
10017 The pattern for store is similar. */
10018 bool
10019 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10020 bool consecutive, bool return_pc)
10021 {
10022 HOST_WIDE_INT count = XVECLEN (op, 0);
10023 rtx reg, mem, addr;
10024 unsigned regno;
10025 unsigned first_regno;
10026 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10027 rtx elt;
10028 bool addr_reg_in_reglist = false;
10029 bool update = false;
10030 int reg_increment;
10031 int offset_adj;
10032 int regs_per_val;
10033
10034 /* If not in SImode, then registers must be consecutive
10035 (e.g., VLDM instructions for DFmode). */
10036 gcc_assert ((mode == SImode) || consecutive);
10037 /* Setting return_pc for stores is illegal. */
10038 gcc_assert (!return_pc || load);
10039
10040 /* Set up the increments and the regs per val based on the mode. */
10041 reg_increment = GET_MODE_SIZE (mode);
10042 regs_per_val = reg_increment / 4;
10043 offset_adj = return_pc ? 1 : 0;
10044
10045 if (count <= 1
10046 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10047 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10048 return false;
10049
10050 /* Check if this is a write-back. */
10051 elt = XVECEXP (op, 0, offset_adj);
10052 if (GET_CODE (SET_SRC (elt)) == PLUS)
10053 {
10054 i++;
10055 base = 1;
10056 update = true;
10057
10058 /* The offset adjustment must be the number of registers being
10059 popped times the size of a single register. */
10060 if (!REG_P (SET_DEST (elt))
10061 || !REG_P (XEXP (SET_SRC (elt), 0))
10062 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10063 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10064 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10065 ((count - 1 - offset_adj) * reg_increment))
10066 return false;
10067 }
10068
10069 i = i + offset_adj;
10070 base = base + offset_adj;
10071 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10072 success depends on the type: VLDM can do just one reg,
10073 LDM must do at least two. */
10074 if ((count <= i) && (mode == SImode))
10075 return false;
10076
10077 elt = XVECEXP (op, 0, i - 1);
10078 if (GET_CODE (elt) != SET)
10079 return false;
10080
10081 if (load)
10082 {
10083 reg = SET_DEST (elt);
10084 mem = SET_SRC (elt);
10085 }
10086 else
10087 {
10088 reg = SET_SRC (elt);
10089 mem = SET_DEST (elt);
10090 }
10091
10092 if (!REG_P (reg) || !MEM_P (mem))
10093 return false;
10094
10095 regno = REGNO (reg);
10096 first_regno = regno;
10097 addr = XEXP (mem, 0);
10098 if (GET_CODE (addr) == PLUS)
10099 {
10100 if (!CONST_INT_P (XEXP (addr, 1)))
10101 return false;
10102
10103 offset = INTVAL (XEXP (addr, 1));
10104 addr = XEXP (addr, 0);
10105 }
10106
10107 if (!REG_P (addr))
10108 return false;
10109
10110 /* Don't allow SP to be loaded unless it is also the base register. It
10111 guarantees that SP is reset correctly when an LDM instruction
10112 is interruptted. Otherwise, we might end up with a corrupt stack. */
10113 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10114 return false;
10115
10116 for (; i < count; i++)
10117 {
10118 elt = XVECEXP (op, 0, i);
10119 if (GET_CODE (elt) != SET)
10120 return false;
10121
10122 if (load)
10123 {
10124 reg = SET_DEST (elt);
10125 mem = SET_SRC (elt);
10126 }
10127 else
10128 {
10129 reg = SET_SRC (elt);
10130 mem = SET_DEST (elt);
10131 }
10132
10133 if (!REG_P (reg)
10134 || GET_MODE (reg) != mode
10135 || REGNO (reg) <= regno
10136 || (consecutive
10137 && (REGNO (reg) !=
10138 (unsigned int) (first_regno + regs_per_val * (i - base))))
10139 /* Don't allow SP to be loaded unless it is also the base register. It
10140 guarantees that SP is reset correctly when an LDM instruction
10141 is interrupted. Otherwise, we might end up with a corrupt stack. */
10142 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10143 || !MEM_P (mem)
10144 || GET_MODE (mem) != mode
10145 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10146 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10147 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10148 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10149 offset + (i - base) * reg_increment))
10150 && (!REG_P (XEXP (mem, 0))
10151 || offset + (i - base) * reg_increment != 0)))
10152 return false;
10153
10154 regno = REGNO (reg);
10155 if (regno == REGNO (addr))
10156 addr_reg_in_reglist = true;
10157 }
10158
10159 if (load)
10160 {
10161 if (update && addr_reg_in_reglist)
10162 return false;
10163
10164 /* For Thumb-1, address register is always modified - either by write-back
10165 or by explicit load. If the pattern does not describe an update,
10166 then the address register must be in the list of loaded registers. */
10167 if (TARGET_THUMB1)
10168 return update || addr_reg_in_reglist;
10169 }
10170
10171 return true;
10172 }
10173
10174 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10175 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10176 instruction. ADD_OFFSET is nonzero if the base address register needs
10177 to be modified with an add instruction before we can use it. */
10178
10179 static bool
10180 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10181 int nops, HOST_WIDE_INT add_offset)
10182 {
10183 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10184 if the offset isn't small enough. The reason 2 ldrs are faster
10185 is because these ARMs are able to do more than one cache access
10186 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10187 whilst the ARM8 has a double bandwidth cache. This means that
10188 these cores can do both an instruction fetch and a data fetch in
10189 a single cycle, so the trick of calculating the address into a
10190 scratch register (one of the result regs) and then doing a load
10191 multiple actually becomes slower (and no smaller in code size).
10192 That is the transformation
10193
10194 ldr rd1, [rbase + offset]
10195 ldr rd2, [rbase + offset + 4]
10196
10197 to
10198
10199 add rd1, rbase, offset
10200 ldmia rd1, {rd1, rd2}
10201
10202 produces worse code -- '3 cycles + any stalls on rd2' instead of
10203 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10204 access per cycle, the first sequence could never complete in less
10205 than 6 cycles, whereas the ldm sequence would only take 5 and
10206 would make better use of sequential accesses if not hitting the
10207 cache.
10208
10209 We cheat here and test 'arm_ld_sched' which we currently know to
10210 only be true for the ARM8, ARM9 and StrongARM. If this ever
10211 changes, then the test below needs to be reworked. */
10212 if (nops == 2 && arm_ld_sched && add_offset != 0)
10213 return false;
10214
10215 /* XScale has load-store double instructions, but they have stricter
10216 alignment requirements than load-store multiple, so we cannot
10217 use them.
10218
10219 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10220 the pipeline until completion.
10221
10222 NREGS CYCLES
10223 1 3
10224 2 4
10225 3 5
10226 4 6
10227
10228 An ldr instruction takes 1-3 cycles, but does not block the
10229 pipeline.
10230
10231 NREGS CYCLES
10232 1 1-3
10233 2 2-6
10234 3 3-9
10235 4 4-12
10236
10237 Best case ldr will always win. However, the more ldr instructions
10238 we issue, the less likely we are to be able to schedule them well.
10239 Using ldr instructions also increases code size.
10240
10241 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10242 for counts of 3 or 4 regs. */
10243 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10244 return false;
10245 return true;
10246 }
10247
10248 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10249 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10250 an array ORDER which describes the sequence to use when accessing the
10251 offsets that produces an ascending order. In this sequence, each
10252 offset must be larger by exactly 4 than the previous one. ORDER[0]
10253 must have been filled in with the lowest offset by the caller.
10254 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10255 we use to verify that ORDER produces an ascending order of registers.
10256 Return true if it was possible to construct such an order, false if
10257 not. */
10258
10259 static bool
10260 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10261 int *unsorted_regs)
10262 {
10263 int i;
10264 for (i = 1; i < nops; i++)
10265 {
10266 int j;
10267
10268 order[i] = order[i - 1];
10269 for (j = 0; j < nops; j++)
10270 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10271 {
10272 /* We must find exactly one offset that is higher than the
10273 previous one by 4. */
10274 if (order[i] != order[i - 1])
10275 return false;
10276 order[i] = j;
10277 }
10278 if (order[i] == order[i - 1])
10279 return false;
10280 /* The register numbers must be ascending. */
10281 if (unsorted_regs != NULL
10282 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10283 return false;
10284 }
10285 return true;
10286 }
10287
10288 /* Used to determine in a peephole whether a sequence of load
10289 instructions can be changed into a load-multiple instruction.
10290 NOPS is the number of separate load instructions we are examining. The
10291 first NOPS entries in OPERANDS are the destination registers, the
10292 next NOPS entries are memory operands. If this function is
10293 successful, *BASE is set to the common base register of the memory
10294 accesses; *LOAD_OFFSET is set to the first memory location's offset
10295 from that base register.
10296 REGS is an array filled in with the destination register numbers.
10297 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10298 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10299 the sequence of registers in REGS matches the loads from ascending memory
10300 locations, and the function verifies that the register numbers are
10301 themselves ascending. If CHECK_REGS is false, the register numbers
10302 are stored in the order they are found in the operands. */
10303 static int
10304 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10305 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10306 {
10307 int unsorted_regs[MAX_LDM_STM_OPS];
10308 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10309 int order[MAX_LDM_STM_OPS];
10310 rtx base_reg_rtx = NULL;
10311 int base_reg = -1;
10312 int i, ldm_case;
10313
10314 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10315 easily extended if required. */
10316 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10317
10318 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10319
10320 /* Loop over the operands and check that the memory references are
10321 suitable (i.e. immediate offsets from the same base register). At
10322 the same time, extract the target register, and the memory
10323 offsets. */
10324 for (i = 0; i < nops; i++)
10325 {
10326 rtx reg;
10327 rtx offset;
10328
10329 /* Convert a subreg of a mem into the mem itself. */
10330 if (GET_CODE (operands[nops + i]) == SUBREG)
10331 operands[nops + i] = alter_subreg (operands + (nops + i));
10332
10333 gcc_assert (MEM_P (operands[nops + i]));
10334
10335 /* Don't reorder volatile memory references; it doesn't seem worth
10336 looking for the case where the order is ok anyway. */
10337 if (MEM_VOLATILE_P (operands[nops + i]))
10338 return 0;
10339
10340 offset = const0_rtx;
10341
10342 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10343 || (GET_CODE (reg) == SUBREG
10344 && REG_P (reg = SUBREG_REG (reg))))
10345 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10346 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10347 || (GET_CODE (reg) == SUBREG
10348 && REG_P (reg = SUBREG_REG (reg))))
10349 && (CONST_INT_P (offset
10350 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10351 {
10352 if (i == 0)
10353 {
10354 base_reg = REGNO (reg);
10355 base_reg_rtx = reg;
10356 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10357 return 0;
10358 }
10359 else if (base_reg != (int) REGNO (reg))
10360 /* Not addressed from the same base register. */
10361 return 0;
10362
10363 unsorted_regs[i] = (REG_P (operands[i])
10364 ? REGNO (operands[i])
10365 : REGNO (SUBREG_REG (operands[i])));
10366
10367 /* If it isn't an integer register, or if it overwrites the
10368 base register but isn't the last insn in the list, then
10369 we can't do this. */
10370 if (unsorted_regs[i] < 0
10371 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10372 || unsorted_regs[i] > 14
10373 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10374 return 0;
10375
10376 unsorted_offsets[i] = INTVAL (offset);
10377 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10378 order[0] = i;
10379 }
10380 else
10381 /* Not a suitable memory address. */
10382 return 0;
10383 }
10384
10385 /* All the useful information has now been extracted from the
10386 operands into unsorted_regs and unsorted_offsets; additionally,
10387 order[0] has been set to the lowest offset in the list. Sort
10388 the offsets into order, verifying that they are adjacent, and
10389 check that the register numbers are ascending. */
10390 if (!compute_offset_order (nops, unsorted_offsets, order,
10391 check_regs ? unsorted_regs : NULL))
10392 return 0;
10393
10394 if (saved_order)
10395 memcpy (saved_order, order, sizeof order);
10396
10397 if (base)
10398 {
10399 *base = base_reg;
10400
10401 for (i = 0; i < nops; i++)
10402 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10403
10404 *load_offset = unsorted_offsets[order[0]];
10405 }
10406
10407 if (TARGET_THUMB1
10408 && !peep2_reg_dead_p (nops, base_reg_rtx))
10409 return 0;
10410
10411 if (unsorted_offsets[order[0]] == 0)
10412 ldm_case = 1; /* ldmia */
10413 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10414 ldm_case = 2; /* ldmib */
10415 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10416 ldm_case = 3; /* ldmda */
10417 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10418 ldm_case = 4; /* ldmdb */
10419 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10420 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10421 ldm_case = 5;
10422 else
10423 return 0;
10424
10425 if (!multiple_operation_profitable_p (false, nops,
10426 ldm_case == 5
10427 ? unsorted_offsets[order[0]] : 0))
10428 return 0;
10429
10430 return ldm_case;
10431 }
10432
10433 /* Used to determine in a peephole whether a sequence of store instructions can
10434 be changed into a store-multiple instruction.
10435 NOPS is the number of separate store instructions we are examining.
10436 NOPS_TOTAL is the total number of instructions recognized by the peephole
10437 pattern.
10438 The first NOPS entries in OPERANDS are the source registers, the next
10439 NOPS entries are memory operands. If this function is successful, *BASE is
10440 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10441 to the first memory location's offset from that base register. REGS is an
10442 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10443 likewise filled with the corresponding rtx's.
10444 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10445 numbers to an ascending order of stores.
10446 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10447 from ascending memory locations, and the function verifies that the register
10448 numbers are themselves ascending. If CHECK_REGS is false, the register
10449 numbers are stored in the order they are found in the operands. */
10450 static int
10451 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10452 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10453 HOST_WIDE_INT *load_offset, bool check_regs)
10454 {
10455 int unsorted_regs[MAX_LDM_STM_OPS];
10456 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10457 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10458 int order[MAX_LDM_STM_OPS];
10459 int base_reg = -1;
10460 rtx base_reg_rtx = NULL;
10461 int i, stm_case;
10462
10463 /* Write back of base register is currently only supported for Thumb 1. */
10464 int base_writeback = TARGET_THUMB1;
10465
10466 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10467 easily extended if required. */
10468 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10469
10470 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10471
10472 /* Loop over the operands and check that the memory references are
10473 suitable (i.e. immediate offsets from the same base register). At
10474 the same time, extract the target register, and the memory
10475 offsets. */
10476 for (i = 0; i < nops; i++)
10477 {
10478 rtx reg;
10479 rtx offset;
10480
10481 /* Convert a subreg of a mem into the mem itself. */
10482 if (GET_CODE (operands[nops + i]) == SUBREG)
10483 operands[nops + i] = alter_subreg (operands + (nops + i));
10484
10485 gcc_assert (MEM_P (operands[nops + i]));
10486
10487 /* Don't reorder volatile memory references; it doesn't seem worth
10488 looking for the case where the order is ok anyway. */
10489 if (MEM_VOLATILE_P (operands[nops + i]))
10490 return 0;
10491
10492 offset = const0_rtx;
10493
10494 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10495 || (GET_CODE (reg) == SUBREG
10496 && REG_P (reg = SUBREG_REG (reg))))
10497 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10498 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10499 || (GET_CODE (reg) == SUBREG
10500 && REG_P (reg = SUBREG_REG (reg))))
10501 && (CONST_INT_P (offset
10502 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10503 {
10504 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10505 ? operands[i] : SUBREG_REG (operands[i]));
10506 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10507
10508 if (i == 0)
10509 {
10510 base_reg = REGNO (reg);
10511 base_reg_rtx = reg;
10512 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10513 return 0;
10514 }
10515 else if (base_reg != (int) REGNO (reg))
10516 /* Not addressed from the same base register. */
10517 return 0;
10518
10519 /* If it isn't an integer register, then we can't do this. */
10520 if (unsorted_regs[i] < 0
10521 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10522 /* The effects are unpredictable if the base register is
10523 both updated and stored. */
10524 || (base_writeback && unsorted_regs[i] == base_reg)
10525 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10526 || unsorted_regs[i] > 14)
10527 return 0;
10528
10529 unsorted_offsets[i] = INTVAL (offset);
10530 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10531 order[0] = i;
10532 }
10533 else
10534 /* Not a suitable memory address. */
10535 return 0;
10536 }
10537
10538 /* All the useful information has now been extracted from the
10539 operands into unsorted_regs and unsorted_offsets; additionally,
10540 order[0] has been set to the lowest offset in the list. Sort
10541 the offsets into order, verifying that they are adjacent, and
10542 check that the register numbers are ascending. */
10543 if (!compute_offset_order (nops, unsorted_offsets, order,
10544 check_regs ? unsorted_regs : NULL))
10545 return 0;
10546
10547 if (saved_order)
10548 memcpy (saved_order, order, sizeof order);
10549
10550 if (base)
10551 {
10552 *base = base_reg;
10553
10554 for (i = 0; i < nops; i++)
10555 {
10556 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10557 if (reg_rtxs)
10558 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10559 }
10560
10561 *load_offset = unsorted_offsets[order[0]];
10562 }
10563
10564 if (TARGET_THUMB1
10565 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10566 return 0;
10567
10568 if (unsorted_offsets[order[0]] == 0)
10569 stm_case = 1; /* stmia */
10570 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10571 stm_case = 2; /* stmib */
10572 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10573 stm_case = 3; /* stmda */
10574 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10575 stm_case = 4; /* stmdb */
10576 else
10577 return 0;
10578
10579 if (!multiple_operation_profitable_p (false, nops, 0))
10580 return 0;
10581
10582 return stm_case;
10583 }
10584 \f
10585 /* Routines for use in generating RTL. */
10586
10587 /* Generate a load-multiple instruction. COUNT is the number of loads in
10588 the instruction; REGS and MEMS are arrays containing the operands.
10589 BASEREG is the base register to be used in addressing the memory operands.
10590 WBACK_OFFSET is nonzero if the instruction should update the base
10591 register. */
10592
10593 static rtx
10594 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10595 HOST_WIDE_INT wback_offset)
10596 {
10597 int i = 0, j;
10598 rtx result;
10599
10600 if (!multiple_operation_profitable_p (false, count, 0))
10601 {
10602 rtx seq;
10603
10604 start_sequence ();
10605
10606 for (i = 0; i < count; i++)
10607 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10608
10609 if (wback_offset != 0)
10610 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10611
10612 seq = get_insns ();
10613 end_sequence ();
10614
10615 return seq;
10616 }
10617
10618 result = gen_rtx_PARALLEL (VOIDmode,
10619 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10620 if (wback_offset != 0)
10621 {
10622 XVECEXP (result, 0, 0)
10623 = gen_rtx_SET (VOIDmode, basereg,
10624 plus_constant (Pmode, basereg, wback_offset));
10625 i = 1;
10626 count++;
10627 }
10628
10629 for (j = 0; i < count; i++, j++)
10630 XVECEXP (result, 0, i)
10631 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10632
10633 return result;
10634 }
10635
10636 /* Generate a store-multiple instruction. COUNT is the number of stores in
10637 the instruction; REGS and MEMS are arrays containing the operands.
10638 BASEREG is the base register to be used in addressing the memory operands.
10639 WBACK_OFFSET is nonzero if the instruction should update the base
10640 register. */
10641
10642 static rtx
10643 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10644 HOST_WIDE_INT wback_offset)
10645 {
10646 int i = 0, j;
10647 rtx result;
10648
10649 if (GET_CODE (basereg) == PLUS)
10650 basereg = XEXP (basereg, 0);
10651
10652 if (!multiple_operation_profitable_p (false, count, 0))
10653 {
10654 rtx seq;
10655
10656 start_sequence ();
10657
10658 for (i = 0; i < count; i++)
10659 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10660
10661 if (wback_offset != 0)
10662 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10663
10664 seq = get_insns ();
10665 end_sequence ();
10666
10667 return seq;
10668 }
10669
10670 result = gen_rtx_PARALLEL (VOIDmode,
10671 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10672 if (wback_offset != 0)
10673 {
10674 XVECEXP (result, 0, 0)
10675 = gen_rtx_SET (VOIDmode, basereg,
10676 plus_constant (Pmode, basereg, wback_offset));
10677 i = 1;
10678 count++;
10679 }
10680
10681 for (j = 0; i < count; i++, j++)
10682 XVECEXP (result, 0, i)
10683 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10684
10685 return result;
10686 }
10687
10688 /* Generate either a load-multiple or a store-multiple instruction. This
10689 function can be used in situations where we can start with a single MEM
10690 rtx and adjust its address upwards.
10691 COUNT is the number of operations in the instruction, not counting a
10692 possible update of the base register. REGS is an array containing the
10693 register operands.
10694 BASEREG is the base register to be used in addressing the memory operands,
10695 which are constructed from BASEMEM.
10696 WRITE_BACK specifies whether the generated instruction should include an
10697 update of the base register.
10698 OFFSETP is used to pass an offset to and from this function; this offset
10699 is not used when constructing the address (instead BASEMEM should have an
10700 appropriate offset in its address), it is used only for setting
10701 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10702
10703 static rtx
10704 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10705 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10706 {
10707 rtx mems[MAX_LDM_STM_OPS];
10708 HOST_WIDE_INT offset = *offsetp;
10709 int i;
10710
10711 gcc_assert (count <= MAX_LDM_STM_OPS);
10712
10713 if (GET_CODE (basereg) == PLUS)
10714 basereg = XEXP (basereg, 0);
10715
10716 for (i = 0; i < count; i++)
10717 {
10718 rtx addr = plus_constant (Pmode, basereg, i * 4);
10719 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10720 offset += 4;
10721 }
10722
10723 if (write_back)
10724 *offsetp = offset;
10725
10726 if (is_load)
10727 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10728 write_back ? 4 * count : 0);
10729 else
10730 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10731 write_back ? 4 * count : 0);
10732 }
10733
10734 rtx
10735 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10736 rtx basemem, HOST_WIDE_INT *offsetp)
10737 {
10738 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10739 offsetp);
10740 }
10741
10742 rtx
10743 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10744 rtx basemem, HOST_WIDE_INT *offsetp)
10745 {
10746 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10747 offsetp);
10748 }
10749
10750 /* Called from a peephole2 expander to turn a sequence of loads into an
10751 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10752 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10753 is true if we can reorder the registers because they are used commutatively
10754 subsequently.
10755 Returns true iff we could generate a new instruction. */
10756
10757 bool
10758 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10759 {
10760 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10761 rtx mems[MAX_LDM_STM_OPS];
10762 int i, j, base_reg;
10763 rtx base_reg_rtx;
10764 HOST_WIDE_INT offset;
10765 int write_back = FALSE;
10766 int ldm_case;
10767 rtx addr;
10768
10769 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10770 &base_reg, &offset, !sort_regs);
10771
10772 if (ldm_case == 0)
10773 return false;
10774
10775 if (sort_regs)
10776 for (i = 0; i < nops - 1; i++)
10777 for (j = i + 1; j < nops; j++)
10778 if (regs[i] > regs[j])
10779 {
10780 int t = regs[i];
10781 regs[i] = regs[j];
10782 regs[j] = t;
10783 }
10784 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10785
10786 if (TARGET_THUMB1)
10787 {
10788 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10789 gcc_assert (ldm_case == 1 || ldm_case == 5);
10790 write_back = TRUE;
10791 }
10792
10793 if (ldm_case == 5)
10794 {
10795 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10796 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10797 offset = 0;
10798 if (!TARGET_THUMB1)
10799 {
10800 base_reg = regs[0];
10801 base_reg_rtx = newbase;
10802 }
10803 }
10804
10805 for (i = 0; i < nops; i++)
10806 {
10807 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10808 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10809 SImode, addr, 0);
10810 }
10811 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10812 write_back ? offset + i * 4 : 0));
10813 return true;
10814 }
10815
10816 /* Called from a peephole2 expander to turn a sequence of stores into an
10817 STM instruction. OPERANDS are the operands found by the peephole matcher;
10818 NOPS indicates how many separate stores we are trying to combine.
10819 Returns true iff we could generate a new instruction. */
10820
10821 bool
10822 gen_stm_seq (rtx *operands, int nops)
10823 {
10824 int i;
10825 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10826 rtx mems[MAX_LDM_STM_OPS];
10827 int base_reg;
10828 rtx base_reg_rtx;
10829 HOST_WIDE_INT offset;
10830 int write_back = FALSE;
10831 int stm_case;
10832 rtx addr;
10833 bool base_reg_dies;
10834
10835 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10836 mem_order, &base_reg, &offset, true);
10837
10838 if (stm_case == 0)
10839 return false;
10840
10841 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10842
10843 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10844 if (TARGET_THUMB1)
10845 {
10846 gcc_assert (base_reg_dies);
10847 write_back = TRUE;
10848 }
10849
10850 if (stm_case == 5)
10851 {
10852 gcc_assert (base_reg_dies);
10853 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10854 offset = 0;
10855 }
10856
10857 addr = plus_constant (Pmode, base_reg_rtx, offset);
10858
10859 for (i = 0; i < nops; i++)
10860 {
10861 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10862 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10863 SImode, addr, 0);
10864 }
10865 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10866 write_back ? offset + i * 4 : 0));
10867 return true;
10868 }
10869
10870 /* Called from a peephole2 expander to turn a sequence of stores that are
10871 preceded by constant loads into an STM instruction. OPERANDS are the
10872 operands found by the peephole matcher; NOPS indicates how many
10873 separate stores we are trying to combine; there are 2 * NOPS
10874 instructions in the peephole.
10875 Returns true iff we could generate a new instruction. */
10876
10877 bool
10878 gen_const_stm_seq (rtx *operands, int nops)
10879 {
10880 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10881 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10882 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10883 rtx mems[MAX_LDM_STM_OPS];
10884 int base_reg;
10885 rtx base_reg_rtx;
10886 HOST_WIDE_INT offset;
10887 int write_back = FALSE;
10888 int stm_case;
10889 rtx addr;
10890 bool base_reg_dies;
10891 int i, j;
10892 HARD_REG_SET allocated;
10893
10894 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10895 mem_order, &base_reg, &offset, false);
10896
10897 if (stm_case == 0)
10898 return false;
10899
10900 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10901
10902 /* If the same register is used more than once, try to find a free
10903 register. */
10904 CLEAR_HARD_REG_SET (allocated);
10905 for (i = 0; i < nops; i++)
10906 {
10907 for (j = i + 1; j < nops; j++)
10908 if (regs[i] == regs[j])
10909 {
10910 rtx t = peep2_find_free_register (0, nops * 2,
10911 TARGET_THUMB1 ? "l" : "r",
10912 SImode, &allocated);
10913 if (t == NULL_RTX)
10914 return false;
10915 reg_rtxs[i] = t;
10916 regs[i] = REGNO (t);
10917 }
10918 }
10919
10920 /* Compute an ordering that maps the register numbers to an ascending
10921 sequence. */
10922 reg_order[0] = 0;
10923 for (i = 0; i < nops; i++)
10924 if (regs[i] < regs[reg_order[0]])
10925 reg_order[0] = i;
10926
10927 for (i = 1; i < nops; i++)
10928 {
10929 int this_order = reg_order[i - 1];
10930 for (j = 0; j < nops; j++)
10931 if (regs[j] > regs[reg_order[i - 1]]
10932 && (this_order == reg_order[i - 1]
10933 || regs[j] < regs[this_order]))
10934 this_order = j;
10935 reg_order[i] = this_order;
10936 }
10937
10938 /* Ensure that registers that must be live after the instruction end
10939 up with the correct value. */
10940 for (i = 0; i < nops; i++)
10941 {
10942 int this_order = reg_order[i];
10943 if ((this_order != mem_order[i]
10944 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10945 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10946 return false;
10947 }
10948
10949 /* Load the constants. */
10950 for (i = 0; i < nops; i++)
10951 {
10952 rtx op = operands[2 * nops + mem_order[i]];
10953 sorted_regs[i] = regs[reg_order[i]];
10954 emit_move_insn (reg_rtxs[reg_order[i]], op);
10955 }
10956
10957 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10958
10959 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10960 if (TARGET_THUMB1)
10961 {
10962 gcc_assert (base_reg_dies);
10963 write_back = TRUE;
10964 }
10965
10966 if (stm_case == 5)
10967 {
10968 gcc_assert (base_reg_dies);
10969 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10970 offset = 0;
10971 }
10972
10973 addr = plus_constant (Pmode, base_reg_rtx, offset);
10974
10975 for (i = 0; i < nops; i++)
10976 {
10977 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10978 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10979 SImode, addr, 0);
10980 }
10981 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10982 write_back ? offset + i * 4 : 0));
10983 return true;
10984 }
10985
10986 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10987 unaligned copies on processors which support unaligned semantics for those
10988 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10989 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10990 An interleave factor of 1 (the minimum) will perform no interleaving.
10991 Load/store multiple are used for aligned addresses where possible. */
10992
10993 static void
10994 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10995 HOST_WIDE_INT length,
10996 unsigned int interleave_factor)
10997 {
10998 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10999 int *regnos = XALLOCAVEC (int, interleave_factor);
11000 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11001 HOST_WIDE_INT i, j;
11002 HOST_WIDE_INT remaining = length, words;
11003 rtx halfword_tmp = NULL, byte_tmp = NULL;
11004 rtx dst, src;
11005 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11006 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11007 HOST_WIDE_INT srcoffset, dstoffset;
11008 HOST_WIDE_INT src_autoinc, dst_autoinc;
11009 rtx mem, addr;
11010
11011 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11012
11013 /* Use hard registers if we have aligned source or destination so we can use
11014 load/store multiple with contiguous registers. */
11015 if (dst_aligned || src_aligned)
11016 for (i = 0; i < interleave_factor; i++)
11017 regs[i] = gen_rtx_REG (SImode, i);
11018 else
11019 for (i = 0; i < interleave_factor; i++)
11020 regs[i] = gen_reg_rtx (SImode);
11021
11022 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11023 src = copy_addr_to_reg (XEXP (srcbase, 0));
11024
11025 srcoffset = dstoffset = 0;
11026
11027 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11028 For copying the last bytes we want to subtract this offset again. */
11029 src_autoinc = dst_autoinc = 0;
11030
11031 for (i = 0; i < interleave_factor; i++)
11032 regnos[i] = i;
11033
11034 /* Copy BLOCK_SIZE_BYTES chunks. */
11035
11036 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11037 {
11038 /* Load words. */
11039 if (src_aligned && interleave_factor > 1)
11040 {
11041 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11042 TRUE, srcbase, &srcoffset));
11043 src_autoinc += UNITS_PER_WORD * interleave_factor;
11044 }
11045 else
11046 {
11047 for (j = 0; j < interleave_factor; j++)
11048 {
11049 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11050 - src_autoinc));
11051 mem = adjust_automodify_address (srcbase, SImode, addr,
11052 srcoffset + j * UNITS_PER_WORD);
11053 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11054 }
11055 srcoffset += block_size_bytes;
11056 }
11057
11058 /* Store words. */
11059 if (dst_aligned && interleave_factor > 1)
11060 {
11061 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11062 TRUE, dstbase, &dstoffset));
11063 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11064 }
11065 else
11066 {
11067 for (j = 0; j < interleave_factor; j++)
11068 {
11069 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11070 - dst_autoinc));
11071 mem = adjust_automodify_address (dstbase, SImode, addr,
11072 dstoffset + j * UNITS_PER_WORD);
11073 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11074 }
11075 dstoffset += block_size_bytes;
11076 }
11077
11078 remaining -= block_size_bytes;
11079 }
11080
11081 /* Copy any whole words left (note these aren't interleaved with any
11082 subsequent halfword/byte load/stores in the interests of simplicity). */
11083
11084 words = remaining / UNITS_PER_WORD;
11085
11086 gcc_assert (words < interleave_factor);
11087
11088 if (src_aligned && words > 1)
11089 {
11090 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11091 &srcoffset));
11092 src_autoinc += UNITS_PER_WORD * words;
11093 }
11094 else
11095 {
11096 for (j = 0; j < words; j++)
11097 {
11098 addr = plus_constant (Pmode, src,
11099 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11100 mem = adjust_automodify_address (srcbase, SImode, addr,
11101 srcoffset + j * UNITS_PER_WORD);
11102 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11103 }
11104 srcoffset += words * UNITS_PER_WORD;
11105 }
11106
11107 if (dst_aligned && words > 1)
11108 {
11109 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11110 &dstoffset));
11111 dst_autoinc += words * UNITS_PER_WORD;
11112 }
11113 else
11114 {
11115 for (j = 0; j < words; j++)
11116 {
11117 addr = plus_constant (Pmode, dst,
11118 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11119 mem = adjust_automodify_address (dstbase, SImode, addr,
11120 dstoffset + j * UNITS_PER_WORD);
11121 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11122 }
11123 dstoffset += words * UNITS_PER_WORD;
11124 }
11125
11126 remaining -= words * UNITS_PER_WORD;
11127
11128 gcc_assert (remaining < 4);
11129
11130 /* Copy a halfword if necessary. */
11131
11132 if (remaining >= 2)
11133 {
11134 halfword_tmp = gen_reg_rtx (SImode);
11135
11136 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11137 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11138 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11139
11140 /* Either write out immediately, or delay until we've loaded the last
11141 byte, depending on interleave factor. */
11142 if (interleave_factor == 1)
11143 {
11144 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11145 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11146 emit_insn (gen_unaligned_storehi (mem,
11147 gen_lowpart (HImode, halfword_tmp)));
11148 halfword_tmp = NULL;
11149 dstoffset += 2;
11150 }
11151
11152 remaining -= 2;
11153 srcoffset += 2;
11154 }
11155
11156 gcc_assert (remaining < 2);
11157
11158 /* Copy last byte. */
11159
11160 if ((remaining & 1) != 0)
11161 {
11162 byte_tmp = gen_reg_rtx (SImode);
11163
11164 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11165 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11166 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11167
11168 if (interleave_factor == 1)
11169 {
11170 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11171 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11172 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11173 byte_tmp = NULL;
11174 dstoffset++;
11175 }
11176
11177 remaining--;
11178 srcoffset++;
11179 }
11180
11181 /* Store last halfword if we haven't done so already. */
11182
11183 if (halfword_tmp)
11184 {
11185 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11186 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11187 emit_insn (gen_unaligned_storehi (mem,
11188 gen_lowpart (HImode, halfword_tmp)));
11189 dstoffset += 2;
11190 }
11191
11192 /* Likewise for last byte. */
11193
11194 if (byte_tmp)
11195 {
11196 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11197 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11198 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11199 dstoffset++;
11200 }
11201
11202 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11203 }
11204
11205 /* From mips_adjust_block_mem:
11206
11207 Helper function for doing a loop-based block operation on memory
11208 reference MEM. Each iteration of the loop will operate on LENGTH
11209 bytes of MEM.
11210
11211 Create a new base register for use within the loop and point it to
11212 the start of MEM. Create a new memory reference that uses this
11213 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11214
11215 static void
11216 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11217 rtx *loop_mem)
11218 {
11219 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11220
11221 /* Although the new mem does not refer to a known location,
11222 it does keep up to LENGTH bytes of alignment. */
11223 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11224 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11225 }
11226
11227 /* From mips_block_move_loop:
11228
11229 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11230 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11231 the memory regions do not overlap. */
11232
11233 static void
11234 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11235 unsigned int interleave_factor,
11236 HOST_WIDE_INT bytes_per_iter)
11237 {
11238 rtx label, src_reg, dest_reg, final_src, test;
11239 HOST_WIDE_INT leftover;
11240
11241 leftover = length % bytes_per_iter;
11242 length -= leftover;
11243
11244 /* Create registers and memory references for use within the loop. */
11245 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11246 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11247
11248 /* Calculate the value that SRC_REG should have after the last iteration of
11249 the loop. */
11250 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11251 0, 0, OPTAB_WIDEN);
11252
11253 /* Emit the start of the loop. */
11254 label = gen_label_rtx ();
11255 emit_label (label);
11256
11257 /* Emit the loop body. */
11258 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11259 interleave_factor);
11260
11261 /* Move on to the next block. */
11262 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11263 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11264
11265 /* Emit the loop condition. */
11266 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11267 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11268
11269 /* Mop up any left-over bytes. */
11270 if (leftover)
11271 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11272 }
11273
11274 /* Emit a block move when either the source or destination is unaligned (not
11275 aligned to a four-byte boundary). This may need further tuning depending on
11276 core type, optimize_size setting, etc. */
11277
11278 static int
11279 arm_movmemqi_unaligned (rtx *operands)
11280 {
11281 HOST_WIDE_INT length = INTVAL (operands[2]);
11282
11283 if (optimize_size)
11284 {
11285 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11286 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11287 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11288 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11289 or dst_aligned though: allow more interleaving in those cases since the
11290 resulting code can be smaller. */
11291 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11292 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11293
11294 if (length > 12)
11295 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11296 interleave_factor, bytes_per_iter);
11297 else
11298 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11299 interleave_factor);
11300 }
11301 else
11302 {
11303 /* Note that the loop created by arm_block_move_unaligned_loop may be
11304 subject to loop unrolling, which makes tuning this condition a little
11305 redundant. */
11306 if (length > 32)
11307 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11308 else
11309 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11310 }
11311
11312 return 1;
11313 }
11314
11315 int
11316 arm_gen_movmemqi (rtx *operands)
11317 {
11318 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11319 HOST_WIDE_INT srcoffset, dstoffset;
11320 int i;
11321 rtx src, dst, srcbase, dstbase;
11322 rtx part_bytes_reg = NULL;
11323 rtx mem;
11324
11325 if (!CONST_INT_P (operands[2])
11326 || !CONST_INT_P (operands[3])
11327 || INTVAL (operands[2]) > 64)
11328 return 0;
11329
11330 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11331 return arm_movmemqi_unaligned (operands);
11332
11333 if (INTVAL (operands[3]) & 3)
11334 return 0;
11335
11336 dstbase = operands[0];
11337 srcbase = operands[1];
11338
11339 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11340 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11341
11342 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11343 out_words_to_go = INTVAL (operands[2]) / 4;
11344 last_bytes = INTVAL (operands[2]) & 3;
11345 dstoffset = srcoffset = 0;
11346
11347 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11348 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11349
11350 for (i = 0; in_words_to_go >= 2; i+=4)
11351 {
11352 if (in_words_to_go > 4)
11353 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11354 TRUE, srcbase, &srcoffset));
11355 else
11356 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11357 src, FALSE, srcbase,
11358 &srcoffset));
11359
11360 if (out_words_to_go)
11361 {
11362 if (out_words_to_go > 4)
11363 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11364 TRUE, dstbase, &dstoffset));
11365 else if (out_words_to_go != 1)
11366 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11367 out_words_to_go, dst,
11368 (last_bytes == 0
11369 ? FALSE : TRUE),
11370 dstbase, &dstoffset));
11371 else
11372 {
11373 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11374 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11375 if (last_bytes != 0)
11376 {
11377 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11378 dstoffset += 4;
11379 }
11380 }
11381 }
11382
11383 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11384 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11385 }
11386
11387 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11388 if (out_words_to_go)
11389 {
11390 rtx sreg;
11391
11392 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11393 sreg = copy_to_reg (mem);
11394
11395 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11396 emit_move_insn (mem, sreg);
11397 in_words_to_go--;
11398
11399 gcc_assert (!in_words_to_go); /* Sanity check */
11400 }
11401
11402 if (in_words_to_go)
11403 {
11404 gcc_assert (in_words_to_go > 0);
11405
11406 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11407 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11408 }
11409
11410 gcc_assert (!last_bytes || part_bytes_reg);
11411
11412 if (BYTES_BIG_ENDIAN && last_bytes)
11413 {
11414 rtx tmp = gen_reg_rtx (SImode);
11415
11416 /* The bytes we want are in the top end of the word. */
11417 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11418 GEN_INT (8 * (4 - last_bytes))));
11419 part_bytes_reg = tmp;
11420
11421 while (last_bytes)
11422 {
11423 mem = adjust_automodify_address (dstbase, QImode,
11424 plus_constant (Pmode, dst,
11425 last_bytes - 1),
11426 dstoffset + last_bytes - 1);
11427 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11428
11429 if (--last_bytes)
11430 {
11431 tmp = gen_reg_rtx (SImode);
11432 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11433 part_bytes_reg = tmp;
11434 }
11435 }
11436
11437 }
11438 else
11439 {
11440 if (last_bytes > 1)
11441 {
11442 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11443 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11444 last_bytes -= 2;
11445 if (last_bytes)
11446 {
11447 rtx tmp = gen_reg_rtx (SImode);
11448 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11449 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11450 part_bytes_reg = tmp;
11451 dstoffset += 2;
11452 }
11453 }
11454
11455 if (last_bytes)
11456 {
11457 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11458 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11459 }
11460 }
11461
11462 return 1;
11463 }
11464
11465 /* Select a dominance comparison mode if possible for a test of the general
11466 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11467 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11468 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11469 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11470 In all cases OP will be either EQ or NE, but we don't need to know which
11471 here. If we are unable to support a dominance comparison we return
11472 CC mode. This will then fail to match for the RTL expressions that
11473 generate this call. */
11474 enum machine_mode
11475 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11476 {
11477 enum rtx_code cond1, cond2;
11478 int swapped = 0;
11479
11480 /* Currently we will probably get the wrong result if the individual
11481 comparisons are not simple. This also ensures that it is safe to
11482 reverse a comparison if necessary. */
11483 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11484 != CCmode)
11485 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11486 != CCmode))
11487 return CCmode;
11488
11489 /* The if_then_else variant of this tests the second condition if the
11490 first passes, but is true if the first fails. Reverse the first
11491 condition to get a true "inclusive-or" expression. */
11492 if (cond_or == DOM_CC_NX_OR_Y)
11493 cond1 = reverse_condition (cond1);
11494
11495 /* If the comparisons are not equal, and one doesn't dominate the other,
11496 then we can't do this. */
11497 if (cond1 != cond2
11498 && !comparison_dominates_p (cond1, cond2)
11499 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11500 return CCmode;
11501
11502 if (swapped)
11503 {
11504 enum rtx_code temp = cond1;
11505 cond1 = cond2;
11506 cond2 = temp;
11507 }
11508
11509 switch (cond1)
11510 {
11511 case EQ:
11512 if (cond_or == DOM_CC_X_AND_Y)
11513 return CC_DEQmode;
11514
11515 switch (cond2)
11516 {
11517 case EQ: return CC_DEQmode;
11518 case LE: return CC_DLEmode;
11519 case LEU: return CC_DLEUmode;
11520 case GE: return CC_DGEmode;
11521 case GEU: return CC_DGEUmode;
11522 default: gcc_unreachable ();
11523 }
11524
11525 case LT:
11526 if (cond_or == DOM_CC_X_AND_Y)
11527 return CC_DLTmode;
11528
11529 switch (cond2)
11530 {
11531 case LT:
11532 return CC_DLTmode;
11533 case LE:
11534 return CC_DLEmode;
11535 case NE:
11536 return CC_DNEmode;
11537 default:
11538 gcc_unreachable ();
11539 }
11540
11541 case GT:
11542 if (cond_or == DOM_CC_X_AND_Y)
11543 return CC_DGTmode;
11544
11545 switch (cond2)
11546 {
11547 case GT:
11548 return CC_DGTmode;
11549 case GE:
11550 return CC_DGEmode;
11551 case NE:
11552 return CC_DNEmode;
11553 default:
11554 gcc_unreachable ();
11555 }
11556
11557 case LTU:
11558 if (cond_or == DOM_CC_X_AND_Y)
11559 return CC_DLTUmode;
11560
11561 switch (cond2)
11562 {
11563 case LTU:
11564 return CC_DLTUmode;
11565 case LEU:
11566 return CC_DLEUmode;
11567 case NE:
11568 return CC_DNEmode;
11569 default:
11570 gcc_unreachable ();
11571 }
11572
11573 case GTU:
11574 if (cond_or == DOM_CC_X_AND_Y)
11575 return CC_DGTUmode;
11576
11577 switch (cond2)
11578 {
11579 case GTU:
11580 return CC_DGTUmode;
11581 case GEU:
11582 return CC_DGEUmode;
11583 case NE:
11584 return CC_DNEmode;
11585 default:
11586 gcc_unreachable ();
11587 }
11588
11589 /* The remaining cases only occur when both comparisons are the
11590 same. */
11591 case NE:
11592 gcc_assert (cond1 == cond2);
11593 return CC_DNEmode;
11594
11595 case LE:
11596 gcc_assert (cond1 == cond2);
11597 return CC_DLEmode;
11598
11599 case GE:
11600 gcc_assert (cond1 == cond2);
11601 return CC_DGEmode;
11602
11603 case LEU:
11604 gcc_assert (cond1 == cond2);
11605 return CC_DLEUmode;
11606
11607 case GEU:
11608 gcc_assert (cond1 == cond2);
11609 return CC_DGEUmode;
11610
11611 default:
11612 gcc_unreachable ();
11613 }
11614 }
11615
11616 enum machine_mode
11617 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11618 {
11619 /* All floating point compares return CCFP if it is an equality
11620 comparison, and CCFPE otherwise. */
11621 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11622 {
11623 switch (op)
11624 {
11625 case EQ:
11626 case NE:
11627 case UNORDERED:
11628 case ORDERED:
11629 case UNLT:
11630 case UNLE:
11631 case UNGT:
11632 case UNGE:
11633 case UNEQ:
11634 case LTGT:
11635 return CCFPmode;
11636
11637 case LT:
11638 case LE:
11639 case GT:
11640 case GE:
11641 return CCFPEmode;
11642
11643 default:
11644 gcc_unreachable ();
11645 }
11646 }
11647
11648 /* A compare with a shifted operand. Because of canonicalization, the
11649 comparison will have to be swapped when we emit the assembler. */
11650 if (GET_MODE (y) == SImode
11651 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11652 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11653 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11654 || GET_CODE (x) == ROTATERT))
11655 return CC_SWPmode;
11656
11657 /* This operation is performed swapped, but since we only rely on the Z
11658 flag we don't need an additional mode. */
11659 if (GET_MODE (y) == SImode
11660 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11661 && GET_CODE (x) == NEG
11662 && (op == EQ || op == NE))
11663 return CC_Zmode;
11664
11665 /* This is a special case that is used by combine to allow a
11666 comparison of a shifted byte load to be split into a zero-extend
11667 followed by a comparison of the shifted integer (only valid for
11668 equalities and unsigned inequalities). */
11669 if (GET_MODE (x) == SImode
11670 && GET_CODE (x) == ASHIFT
11671 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
11672 && GET_CODE (XEXP (x, 0)) == SUBREG
11673 && MEM_P (SUBREG_REG (XEXP (x, 0)))
11674 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11675 && (op == EQ || op == NE
11676 || op == GEU || op == GTU || op == LTU || op == LEU)
11677 && CONST_INT_P (y))
11678 return CC_Zmode;
11679
11680 /* A construct for a conditional compare, if the false arm contains
11681 0, then both conditions must be true, otherwise either condition
11682 must be true. Not all conditions are possible, so CCmode is
11683 returned if it can't be done. */
11684 if (GET_CODE (x) == IF_THEN_ELSE
11685 && (XEXP (x, 2) == const0_rtx
11686 || XEXP (x, 2) == const1_rtx)
11687 && COMPARISON_P (XEXP (x, 0))
11688 && COMPARISON_P (XEXP (x, 1)))
11689 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11690 INTVAL (XEXP (x, 2)));
11691
11692 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11693 if (GET_CODE (x) == AND
11694 && (op == EQ || op == NE)
11695 && COMPARISON_P (XEXP (x, 0))
11696 && COMPARISON_P (XEXP (x, 1)))
11697 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11698 DOM_CC_X_AND_Y);
11699
11700 if (GET_CODE (x) == IOR
11701 && (op == EQ || op == NE)
11702 && COMPARISON_P (XEXP (x, 0))
11703 && COMPARISON_P (XEXP (x, 1)))
11704 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11705 DOM_CC_X_OR_Y);
11706
11707 /* An operation (on Thumb) where we want to test for a single bit.
11708 This is done by shifting that bit up into the top bit of a
11709 scratch register; we can then branch on the sign bit. */
11710 if (TARGET_THUMB1
11711 && GET_MODE (x) == SImode
11712 && (op == EQ || op == NE)
11713 && GET_CODE (x) == ZERO_EXTRACT
11714 && XEXP (x, 1) == const1_rtx)
11715 return CC_Nmode;
11716
11717 /* An operation that sets the condition codes as a side-effect, the
11718 V flag is not set correctly, so we can only use comparisons where
11719 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11720 instead.) */
11721 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11722 if (GET_MODE (x) == SImode
11723 && y == const0_rtx
11724 && (op == EQ || op == NE || op == LT || op == GE)
11725 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11726 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11727 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11728 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11729 || GET_CODE (x) == LSHIFTRT
11730 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11731 || GET_CODE (x) == ROTATERT
11732 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11733 return CC_NOOVmode;
11734
11735 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11736 return CC_Zmode;
11737
11738 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11739 && GET_CODE (x) == PLUS
11740 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11741 return CC_Cmode;
11742
11743 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11744 {
11745 switch (op)
11746 {
11747 case EQ:
11748 case NE:
11749 /* A DImode comparison against zero can be implemented by
11750 or'ing the two halves together. */
11751 if (y == const0_rtx)
11752 return CC_Zmode;
11753
11754 /* We can do an equality test in three Thumb instructions. */
11755 if (!TARGET_32BIT)
11756 return CC_Zmode;
11757
11758 /* FALLTHROUGH */
11759
11760 case LTU:
11761 case LEU:
11762 case GTU:
11763 case GEU:
11764 /* DImode unsigned comparisons can be implemented by cmp +
11765 cmpeq without a scratch register. Not worth doing in
11766 Thumb-2. */
11767 if (TARGET_32BIT)
11768 return CC_CZmode;
11769
11770 /* FALLTHROUGH */
11771
11772 case LT:
11773 case LE:
11774 case GT:
11775 case GE:
11776 /* DImode signed and unsigned comparisons can be implemented
11777 by cmp + sbcs with a scratch register, but that does not
11778 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11779 gcc_assert (op != EQ && op != NE);
11780 return CC_NCVmode;
11781
11782 default:
11783 gcc_unreachable ();
11784 }
11785 }
11786
11787 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11788 return GET_MODE (x);
11789
11790 return CCmode;
11791 }
11792
11793 /* X and Y are two things to compare using CODE. Emit the compare insn and
11794 return the rtx for register 0 in the proper mode. FP means this is a
11795 floating point compare: I don't think that it is needed on the arm. */
11796 rtx
11797 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11798 {
11799 enum machine_mode mode;
11800 rtx cc_reg;
11801 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11802
11803 /* We might have X as a constant, Y as a register because of the predicates
11804 used for cmpdi. If so, force X to a register here. */
11805 if (dimode_comparison && !REG_P (x))
11806 x = force_reg (DImode, x);
11807
11808 mode = SELECT_CC_MODE (code, x, y);
11809 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11810
11811 if (dimode_comparison
11812 && mode != CC_CZmode)
11813 {
11814 rtx clobber, set;
11815
11816 /* To compare two non-zero values for equality, XOR them and
11817 then compare against zero. Not used for ARM mode; there
11818 CC_CZmode is cheaper. */
11819 if (mode == CC_Zmode && y != const0_rtx)
11820 {
11821 gcc_assert (!reload_completed);
11822 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11823 y = const0_rtx;
11824 }
11825
11826 /* A scratch register is required. */
11827 if (reload_completed)
11828 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11829 else
11830 scratch = gen_rtx_SCRATCH (SImode);
11831
11832 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11833 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11834 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11835 }
11836 else
11837 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11838
11839 return cc_reg;
11840 }
11841
11842 /* Generate a sequence of insns that will generate the correct return
11843 address mask depending on the physical architecture that the program
11844 is running on. */
11845 rtx
11846 arm_gen_return_addr_mask (void)
11847 {
11848 rtx reg = gen_reg_rtx (Pmode);
11849
11850 emit_insn (gen_return_addr_mask (reg));
11851 return reg;
11852 }
11853
11854 void
11855 arm_reload_in_hi (rtx *operands)
11856 {
11857 rtx ref = operands[1];
11858 rtx base, scratch;
11859 HOST_WIDE_INT offset = 0;
11860
11861 if (GET_CODE (ref) == SUBREG)
11862 {
11863 offset = SUBREG_BYTE (ref);
11864 ref = SUBREG_REG (ref);
11865 }
11866
11867 if (REG_P (ref))
11868 {
11869 /* We have a pseudo which has been spilt onto the stack; there
11870 are two cases here: the first where there is a simple
11871 stack-slot replacement and a second where the stack-slot is
11872 out of range, or is used as a subreg. */
11873 if (reg_equiv_mem (REGNO (ref)))
11874 {
11875 ref = reg_equiv_mem (REGNO (ref));
11876 base = find_replacement (&XEXP (ref, 0));
11877 }
11878 else
11879 /* The slot is out of range, or was dressed up in a SUBREG. */
11880 base = reg_equiv_address (REGNO (ref));
11881 }
11882 else
11883 base = find_replacement (&XEXP (ref, 0));
11884
11885 /* Handle the case where the address is too complex to be offset by 1. */
11886 if (GET_CODE (base) == MINUS
11887 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
11888 {
11889 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11890
11891 emit_set_insn (base_plus, base);
11892 base = base_plus;
11893 }
11894 else if (GET_CODE (base) == PLUS)
11895 {
11896 /* The addend must be CONST_INT, or we would have dealt with it above. */
11897 HOST_WIDE_INT hi, lo;
11898
11899 offset += INTVAL (XEXP (base, 1));
11900 base = XEXP (base, 0);
11901
11902 /* Rework the address into a legal sequence of insns. */
11903 /* Valid range for lo is -4095 -> 4095 */
11904 lo = (offset >= 0
11905 ? (offset & 0xfff)
11906 : -((-offset) & 0xfff));
11907
11908 /* Corner case, if lo is the max offset then we would be out of range
11909 once we have added the additional 1 below, so bump the msb into the
11910 pre-loading insn(s). */
11911 if (lo == 4095)
11912 lo &= 0x7ff;
11913
11914 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11915 ^ (HOST_WIDE_INT) 0x80000000)
11916 - (HOST_WIDE_INT) 0x80000000);
11917
11918 gcc_assert (hi + lo == offset);
11919
11920 if (hi != 0)
11921 {
11922 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11923
11924 /* Get the base address; addsi3 knows how to handle constants
11925 that require more than one insn. */
11926 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11927 base = base_plus;
11928 offset = lo;
11929 }
11930 }
11931
11932 /* Operands[2] may overlap operands[0] (though it won't overlap
11933 operands[1]), that's why we asked for a DImode reg -- so we can
11934 use the bit that does not overlap. */
11935 if (REGNO (operands[2]) == REGNO (operands[0]))
11936 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11937 else
11938 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11939
11940 emit_insn (gen_zero_extendqisi2 (scratch,
11941 gen_rtx_MEM (QImode,
11942 plus_constant (Pmode, base,
11943 offset))));
11944 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11945 gen_rtx_MEM (QImode,
11946 plus_constant (Pmode, base,
11947 offset + 1))));
11948 if (!BYTES_BIG_ENDIAN)
11949 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11950 gen_rtx_IOR (SImode,
11951 gen_rtx_ASHIFT
11952 (SImode,
11953 gen_rtx_SUBREG (SImode, operands[0], 0),
11954 GEN_INT (8)),
11955 scratch));
11956 else
11957 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11958 gen_rtx_IOR (SImode,
11959 gen_rtx_ASHIFT (SImode, scratch,
11960 GEN_INT (8)),
11961 gen_rtx_SUBREG (SImode, operands[0], 0)));
11962 }
11963
11964 /* Handle storing a half-word to memory during reload by synthesizing as two
11965 byte stores. Take care not to clobber the input values until after we
11966 have moved them somewhere safe. This code assumes that if the DImode
11967 scratch in operands[2] overlaps either the input value or output address
11968 in some way, then that value must die in this insn (we absolutely need
11969 two scratch registers for some corner cases). */
11970 void
11971 arm_reload_out_hi (rtx *operands)
11972 {
11973 rtx ref = operands[0];
11974 rtx outval = operands[1];
11975 rtx base, scratch;
11976 HOST_WIDE_INT offset = 0;
11977
11978 if (GET_CODE (ref) == SUBREG)
11979 {
11980 offset = SUBREG_BYTE (ref);
11981 ref = SUBREG_REG (ref);
11982 }
11983
11984 if (REG_P (ref))
11985 {
11986 /* We have a pseudo which has been spilt onto the stack; there
11987 are two cases here: the first where there is a simple
11988 stack-slot replacement and a second where the stack-slot is
11989 out of range, or is used as a subreg. */
11990 if (reg_equiv_mem (REGNO (ref)))
11991 {
11992 ref = reg_equiv_mem (REGNO (ref));
11993 base = find_replacement (&XEXP (ref, 0));
11994 }
11995 else
11996 /* The slot is out of range, or was dressed up in a SUBREG. */
11997 base = reg_equiv_address (REGNO (ref));
11998 }
11999 else
12000 base = find_replacement (&XEXP (ref, 0));
12001
12002 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12003
12004 /* Handle the case where the address is too complex to be offset by 1. */
12005 if (GET_CODE (base) == MINUS
12006 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12007 {
12008 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12009
12010 /* Be careful not to destroy OUTVAL. */
12011 if (reg_overlap_mentioned_p (base_plus, outval))
12012 {
12013 /* Updating base_plus might destroy outval, see if we can
12014 swap the scratch and base_plus. */
12015 if (!reg_overlap_mentioned_p (scratch, outval))
12016 {
12017 rtx tmp = scratch;
12018 scratch = base_plus;
12019 base_plus = tmp;
12020 }
12021 else
12022 {
12023 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12024
12025 /* Be conservative and copy OUTVAL into the scratch now,
12026 this should only be necessary if outval is a subreg
12027 of something larger than a word. */
12028 /* XXX Might this clobber base? I can't see how it can,
12029 since scratch is known to overlap with OUTVAL, and
12030 must be wider than a word. */
12031 emit_insn (gen_movhi (scratch_hi, outval));
12032 outval = scratch_hi;
12033 }
12034 }
12035
12036 emit_set_insn (base_plus, base);
12037 base = base_plus;
12038 }
12039 else if (GET_CODE (base) == PLUS)
12040 {
12041 /* The addend must be CONST_INT, or we would have dealt with it above. */
12042 HOST_WIDE_INT hi, lo;
12043
12044 offset += INTVAL (XEXP (base, 1));
12045 base = XEXP (base, 0);
12046
12047 /* Rework the address into a legal sequence of insns. */
12048 /* Valid range for lo is -4095 -> 4095 */
12049 lo = (offset >= 0
12050 ? (offset & 0xfff)
12051 : -((-offset) & 0xfff));
12052
12053 /* Corner case, if lo is the max offset then we would be out of range
12054 once we have added the additional 1 below, so bump the msb into the
12055 pre-loading insn(s). */
12056 if (lo == 4095)
12057 lo &= 0x7ff;
12058
12059 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12060 ^ (HOST_WIDE_INT) 0x80000000)
12061 - (HOST_WIDE_INT) 0x80000000);
12062
12063 gcc_assert (hi + lo == offset);
12064
12065 if (hi != 0)
12066 {
12067 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12068
12069 /* Be careful not to destroy OUTVAL. */
12070 if (reg_overlap_mentioned_p (base_plus, outval))
12071 {
12072 /* Updating base_plus might destroy outval, see if we
12073 can swap the scratch and base_plus. */
12074 if (!reg_overlap_mentioned_p (scratch, outval))
12075 {
12076 rtx tmp = scratch;
12077 scratch = base_plus;
12078 base_plus = tmp;
12079 }
12080 else
12081 {
12082 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12083
12084 /* Be conservative and copy outval into scratch now,
12085 this should only be necessary if outval is a
12086 subreg of something larger than a word. */
12087 /* XXX Might this clobber base? I can't see how it
12088 can, since scratch is known to overlap with
12089 outval. */
12090 emit_insn (gen_movhi (scratch_hi, outval));
12091 outval = scratch_hi;
12092 }
12093 }
12094
12095 /* Get the base address; addsi3 knows how to handle constants
12096 that require more than one insn. */
12097 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12098 base = base_plus;
12099 offset = lo;
12100 }
12101 }
12102
12103 if (BYTES_BIG_ENDIAN)
12104 {
12105 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12106 plus_constant (Pmode, base,
12107 offset + 1)),
12108 gen_lowpart (QImode, outval)));
12109 emit_insn (gen_lshrsi3 (scratch,
12110 gen_rtx_SUBREG (SImode, outval, 0),
12111 GEN_INT (8)));
12112 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12113 offset)),
12114 gen_lowpart (QImode, scratch)));
12115 }
12116 else
12117 {
12118 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12119 offset)),
12120 gen_lowpart (QImode, outval)));
12121 emit_insn (gen_lshrsi3 (scratch,
12122 gen_rtx_SUBREG (SImode, outval, 0),
12123 GEN_INT (8)));
12124 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12125 plus_constant (Pmode, base,
12126 offset + 1)),
12127 gen_lowpart (QImode, scratch)));
12128 }
12129 }
12130
12131 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12132 (padded to the size of a word) should be passed in a register. */
12133
12134 static bool
12135 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12136 {
12137 if (TARGET_AAPCS_BASED)
12138 return must_pass_in_stack_var_size (mode, type);
12139 else
12140 return must_pass_in_stack_var_size_or_pad (mode, type);
12141 }
12142
12143
12144 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12145 Return true if an argument passed on the stack should be padded upwards,
12146 i.e. if the least-significant byte has useful data.
12147 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12148 aggregate types are placed in the lowest memory address. */
12149
12150 bool
12151 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12152 {
12153 if (!TARGET_AAPCS_BASED)
12154 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12155
12156 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12157 return false;
12158
12159 return true;
12160 }
12161
12162
12163 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12164 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12165 register has useful data, and return the opposite if the most
12166 significant byte does. */
12167
12168 bool
12169 arm_pad_reg_upward (enum machine_mode mode,
12170 tree type, int first ATTRIBUTE_UNUSED)
12171 {
12172 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12173 {
12174 /* For AAPCS, small aggregates, small fixed-point types,
12175 and small complex types are always padded upwards. */
12176 if (type)
12177 {
12178 if ((AGGREGATE_TYPE_P (type)
12179 || TREE_CODE (type) == COMPLEX_TYPE
12180 || FIXED_POINT_TYPE_P (type))
12181 && int_size_in_bytes (type) <= 4)
12182 return true;
12183 }
12184 else
12185 {
12186 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12187 && GET_MODE_SIZE (mode) <= 4)
12188 return true;
12189 }
12190 }
12191
12192 /* Otherwise, use default padding. */
12193 return !BYTES_BIG_ENDIAN;
12194 }
12195
12196 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12197 assuming that the address in the base register is word aligned. */
12198 bool
12199 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12200 {
12201 HOST_WIDE_INT max_offset;
12202
12203 /* Offset must be a multiple of 4 in Thumb mode. */
12204 if (TARGET_THUMB2 && ((offset & 3) != 0))
12205 return false;
12206
12207 if (TARGET_THUMB2)
12208 max_offset = 1020;
12209 else if (TARGET_ARM)
12210 max_offset = 255;
12211 else
12212 gcc_unreachable ();
12213
12214 return ((offset <= max_offset) && (offset >= -max_offset));
12215 }
12216
12217 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12218 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12219 Assumes that the address in the base register RN is word aligned. Pattern
12220 guarantees that both memory accesses use the same base register,
12221 the offsets are constants within the range, and the gap between the offsets is 4.
12222 If preload complete then check that registers are legal. WBACK indicates whether
12223 address is updated. LOAD indicates whether memory access is load or store. */
12224 bool
12225 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12226 bool wback, bool load)
12227 {
12228 unsigned int t, t2, n;
12229
12230 if (!reload_completed)
12231 return true;
12232
12233 if (!offset_ok_for_ldrd_strd (offset))
12234 return false;
12235
12236 t = REGNO (rt);
12237 t2 = REGNO (rt2);
12238 n = REGNO (rn);
12239
12240 if ((TARGET_THUMB2)
12241 && ((wback && (n == t || n == t2))
12242 || (t == SP_REGNUM)
12243 || (t == PC_REGNUM)
12244 || (t2 == SP_REGNUM)
12245 || (t2 == PC_REGNUM)
12246 || (!load && (n == PC_REGNUM))
12247 || (load && (t == t2))
12248 /* Triggers Cortex-M3 LDRD errata. */
12249 || (!wback && load && fix_cm3_ldrd && (n == t))))
12250 return false;
12251
12252 if ((TARGET_ARM)
12253 && ((wback && (n == t || n == t2))
12254 || (t2 == PC_REGNUM)
12255 || (t % 2 != 0) /* First destination register is not even. */
12256 || (t2 != t + 1)
12257 /* PC can be used as base register (for offset addressing only),
12258 but it is depricated. */
12259 || (n == PC_REGNUM)))
12260 return false;
12261
12262 return true;
12263 }
12264
12265 \f
12266 /* Print a symbolic form of X to the debug file, F. */
12267 static void
12268 arm_print_value (FILE *f, rtx x)
12269 {
12270 switch (GET_CODE (x))
12271 {
12272 case CONST_INT:
12273 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12274 return;
12275
12276 case CONST_DOUBLE:
12277 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12278 return;
12279
12280 case CONST_VECTOR:
12281 {
12282 int i;
12283
12284 fprintf (f, "<");
12285 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12286 {
12287 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12288 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12289 fputc (',', f);
12290 }
12291 fprintf (f, ">");
12292 }
12293 return;
12294
12295 case CONST_STRING:
12296 fprintf (f, "\"%s\"", XSTR (x, 0));
12297 return;
12298
12299 case SYMBOL_REF:
12300 fprintf (f, "`%s'", XSTR (x, 0));
12301 return;
12302
12303 case LABEL_REF:
12304 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12305 return;
12306
12307 case CONST:
12308 arm_print_value (f, XEXP (x, 0));
12309 return;
12310
12311 case PLUS:
12312 arm_print_value (f, XEXP (x, 0));
12313 fprintf (f, "+");
12314 arm_print_value (f, XEXP (x, 1));
12315 return;
12316
12317 case PC:
12318 fprintf (f, "pc");
12319 return;
12320
12321 default:
12322 fprintf (f, "????");
12323 return;
12324 }
12325 }
12326 \f
12327 /* Routines for manipulation of the constant pool. */
12328
12329 /* Arm instructions cannot load a large constant directly into a
12330 register; they have to come from a pc relative load. The constant
12331 must therefore be placed in the addressable range of the pc
12332 relative load. Depending on the precise pc relative load
12333 instruction the range is somewhere between 256 bytes and 4k. This
12334 means that we often have to dump a constant inside a function, and
12335 generate code to branch around it.
12336
12337 It is important to minimize this, since the branches will slow
12338 things down and make the code larger.
12339
12340 Normally we can hide the table after an existing unconditional
12341 branch so that there is no interruption of the flow, but in the
12342 worst case the code looks like this:
12343
12344 ldr rn, L1
12345 ...
12346 b L2
12347 align
12348 L1: .long value
12349 L2:
12350 ...
12351
12352 ldr rn, L3
12353 ...
12354 b L4
12355 align
12356 L3: .long value
12357 L4:
12358 ...
12359
12360 We fix this by performing a scan after scheduling, which notices
12361 which instructions need to have their operands fetched from the
12362 constant table and builds the table.
12363
12364 The algorithm starts by building a table of all the constants that
12365 need fixing up and all the natural barriers in the function (places
12366 where a constant table can be dropped without breaking the flow).
12367 For each fixup we note how far the pc-relative replacement will be
12368 able to reach and the offset of the instruction into the function.
12369
12370 Having built the table we then group the fixes together to form
12371 tables that are as large as possible (subject to addressing
12372 constraints) and emit each table of constants after the last
12373 barrier that is within range of all the instructions in the group.
12374 If a group does not contain a barrier, then we forcibly create one
12375 by inserting a jump instruction into the flow. Once the table has
12376 been inserted, the insns are then modified to reference the
12377 relevant entry in the pool.
12378
12379 Possible enhancements to the algorithm (not implemented) are:
12380
12381 1) For some processors and object formats, there may be benefit in
12382 aligning the pools to the start of cache lines; this alignment
12383 would need to be taken into account when calculating addressability
12384 of a pool. */
12385
12386 /* These typedefs are located at the start of this file, so that
12387 they can be used in the prototypes there. This comment is to
12388 remind readers of that fact so that the following structures
12389 can be understood more easily.
12390
12391 typedef struct minipool_node Mnode;
12392 typedef struct minipool_fixup Mfix; */
12393
12394 struct minipool_node
12395 {
12396 /* Doubly linked chain of entries. */
12397 Mnode * next;
12398 Mnode * prev;
12399 /* The maximum offset into the code that this entry can be placed. While
12400 pushing fixes for forward references, all entries are sorted in order
12401 of increasing max_address. */
12402 HOST_WIDE_INT max_address;
12403 /* Similarly for an entry inserted for a backwards ref. */
12404 HOST_WIDE_INT min_address;
12405 /* The number of fixes referencing this entry. This can become zero
12406 if we "unpush" an entry. In this case we ignore the entry when we
12407 come to emit the code. */
12408 int refcount;
12409 /* The offset from the start of the minipool. */
12410 HOST_WIDE_INT offset;
12411 /* The value in table. */
12412 rtx value;
12413 /* The mode of value. */
12414 enum machine_mode mode;
12415 /* The size of the value. With iWMMXt enabled
12416 sizes > 4 also imply an alignment of 8-bytes. */
12417 int fix_size;
12418 };
12419
12420 struct minipool_fixup
12421 {
12422 Mfix * next;
12423 rtx insn;
12424 HOST_WIDE_INT address;
12425 rtx * loc;
12426 enum machine_mode mode;
12427 int fix_size;
12428 rtx value;
12429 Mnode * minipool;
12430 HOST_WIDE_INT forwards;
12431 HOST_WIDE_INT backwards;
12432 };
12433
12434 /* Fixes less than a word need padding out to a word boundary. */
12435 #define MINIPOOL_FIX_SIZE(mode) \
12436 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12437
12438 static Mnode * minipool_vector_head;
12439 static Mnode * minipool_vector_tail;
12440 static rtx minipool_vector_label;
12441 static int minipool_pad;
12442
12443 /* The linked list of all minipool fixes required for this function. */
12444 Mfix * minipool_fix_head;
12445 Mfix * minipool_fix_tail;
12446 /* The fix entry for the current minipool, once it has been placed. */
12447 Mfix * minipool_barrier;
12448
12449 /* Determines if INSN is the start of a jump table. Returns the end
12450 of the TABLE or NULL_RTX. */
12451 static rtx
12452 is_jump_table (rtx insn)
12453 {
12454 rtx table;
12455
12456 if (jump_to_label_p (insn)
12457 && ((table = next_real_insn (JUMP_LABEL (insn)))
12458 == next_real_insn (insn))
12459 && table != NULL
12460 && JUMP_P (table)
12461 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12462 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12463 return table;
12464
12465 return NULL_RTX;
12466 }
12467
12468 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12469 #define JUMP_TABLES_IN_TEXT_SECTION 0
12470 #endif
12471
12472 static HOST_WIDE_INT
12473 get_jump_table_size (rtx insn)
12474 {
12475 /* ADDR_VECs only take room if read-only data does into the text
12476 section. */
12477 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12478 {
12479 rtx body = PATTERN (insn);
12480 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12481 HOST_WIDE_INT size;
12482 HOST_WIDE_INT modesize;
12483
12484 modesize = GET_MODE_SIZE (GET_MODE (body));
12485 size = modesize * XVECLEN (body, elt);
12486 switch (modesize)
12487 {
12488 case 1:
12489 /* Round up size of TBB table to a halfword boundary. */
12490 size = (size + 1) & ~(HOST_WIDE_INT)1;
12491 break;
12492 case 2:
12493 /* No padding necessary for TBH. */
12494 break;
12495 case 4:
12496 /* Add two bytes for alignment on Thumb. */
12497 if (TARGET_THUMB)
12498 size += 2;
12499 break;
12500 default:
12501 gcc_unreachable ();
12502 }
12503 return size;
12504 }
12505
12506 return 0;
12507 }
12508
12509 /* Return the maximum amount of padding that will be inserted before
12510 label LABEL. */
12511
12512 static HOST_WIDE_INT
12513 get_label_padding (rtx label)
12514 {
12515 HOST_WIDE_INT align, min_insn_size;
12516
12517 align = 1 << label_to_alignment (label);
12518 min_insn_size = TARGET_THUMB ? 2 : 4;
12519 return align > min_insn_size ? align - min_insn_size : 0;
12520 }
12521
12522 /* Move a minipool fix MP from its current location to before MAX_MP.
12523 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12524 constraints may need updating. */
12525 static Mnode *
12526 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12527 HOST_WIDE_INT max_address)
12528 {
12529 /* The code below assumes these are different. */
12530 gcc_assert (mp != max_mp);
12531
12532 if (max_mp == NULL)
12533 {
12534 if (max_address < mp->max_address)
12535 mp->max_address = max_address;
12536 }
12537 else
12538 {
12539 if (max_address > max_mp->max_address - mp->fix_size)
12540 mp->max_address = max_mp->max_address - mp->fix_size;
12541 else
12542 mp->max_address = max_address;
12543
12544 /* Unlink MP from its current position. Since max_mp is non-null,
12545 mp->prev must be non-null. */
12546 mp->prev->next = mp->next;
12547 if (mp->next != NULL)
12548 mp->next->prev = mp->prev;
12549 else
12550 minipool_vector_tail = mp->prev;
12551
12552 /* Re-insert it before MAX_MP. */
12553 mp->next = max_mp;
12554 mp->prev = max_mp->prev;
12555 max_mp->prev = mp;
12556
12557 if (mp->prev != NULL)
12558 mp->prev->next = mp;
12559 else
12560 minipool_vector_head = mp;
12561 }
12562
12563 /* Save the new entry. */
12564 max_mp = mp;
12565
12566 /* Scan over the preceding entries and adjust their addresses as
12567 required. */
12568 while (mp->prev != NULL
12569 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12570 {
12571 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12572 mp = mp->prev;
12573 }
12574
12575 return max_mp;
12576 }
12577
12578 /* Add a constant to the minipool for a forward reference. Returns the
12579 node added or NULL if the constant will not fit in this pool. */
12580 static Mnode *
12581 add_minipool_forward_ref (Mfix *fix)
12582 {
12583 /* If set, max_mp is the first pool_entry that has a lower
12584 constraint than the one we are trying to add. */
12585 Mnode * max_mp = NULL;
12586 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12587 Mnode * mp;
12588
12589 /* If the minipool starts before the end of FIX->INSN then this FIX
12590 can not be placed into the current pool. Furthermore, adding the
12591 new constant pool entry may cause the pool to start FIX_SIZE bytes
12592 earlier. */
12593 if (minipool_vector_head &&
12594 (fix->address + get_attr_length (fix->insn)
12595 >= minipool_vector_head->max_address - fix->fix_size))
12596 return NULL;
12597
12598 /* Scan the pool to see if a constant with the same value has
12599 already been added. While we are doing this, also note the
12600 location where we must insert the constant if it doesn't already
12601 exist. */
12602 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12603 {
12604 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12605 && fix->mode == mp->mode
12606 && (!LABEL_P (fix->value)
12607 || (CODE_LABEL_NUMBER (fix->value)
12608 == CODE_LABEL_NUMBER (mp->value)))
12609 && rtx_equal_p (fix->value, mp->value))
12610 {
12611 /* More than one fix references this entry. */
12612 mp->refcount++;
12613 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12614 }
12615
12616 /* Note the insertion point if necessary. */
12617 if (max_mp == NULL
12618 && mp->max_address > max_address)
12619 max_mp = mp;
12620
12621 /* If we are inserting an 8-bytes aligned quantity and
12622 we have not already found an insertion point, then
12623 make sure that all such 8-byte aligned quantities are
12624 placed at the start of the pool. */
12625 if (ARM_DOUBLEWORD_ALIGN
12626 && max_mp == NULL
12627 && fix->fix_size >= 8
12628 && mp->fix_size < 8)
12629 {
12630 max_mp = mp;
12631 max_address = mp->max_address;
12632 }
12633 }
12634
12635 /* The value is not currently in the minipool, so we need to create
12636 a new entry for it. If MAX_MP is NULL, the entry will be put on
12637 the end of the list since the placement is less constrained than
12638 any existing entry. Otherwise, we insert the new fix before
12639 MAX_MP and, if necessary, adjust the constraints on the other
12640 entries. */
12641 mp = XNEW (Mnode);
12642 mp->fix_size = fix->fix_size;
12643 mp->mode = fix->mode;
12644 mp->value = fix->value;
12645 mp->refcount = 1;
12646 /* Not yet required for a backwards ref. */
12647 mp->min_address = -65536;
12648
12649 if (max_mp == NULL)
12650 {
12651 mp->max_address = max_address;
12652 mp->next = NULL;
12653 mp->prev = minipool_vector_tail;
12654
12655 if (mp->prev == NULL)
12656 {
12657 minipool_vector_head = mp;
12658 minipool_vector_label = gen_label_rtx ();
12659 }
12660 else
12661 mp->prev->next = mp;
12662
12663 minipool_vector_tail = mp;
12664 }
12665 else
12666 {
12667 if (max_address > max_mp->max_address - mp->fix_size)
12668 mp->max_address = max_mp->max_address - mp->fix_size;
12669 else
12670 mp->max_address = max_address;
12671
12672 mp->next = max_mp;
12673 mp->prev = max_mp->prev;
12674 max_mp->prev = mp;
12675 if (mp->prev != NULL)
12676 mp->prev->next = mp;
12677 else
12678 minipool_vector_head = mp;
12679 }
12680
12681 /* Save the new entry. */
12682 max_mp = mp;
12683
12684 /* Scan over the preceding entries and adjust their addresses as
12685 required. */
12686 while (mp->prev != NULL
12687 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12688 {
12689 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12690 mp = mp->prev;
12691 }
12692
12693 return max_mp;
12694 }
12695
12696 static Mnode *
12697 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12698 HOST_WIDE_INT min_address)
12699 {
12700 HOST_WIDE_INT offset;
12701
12702 /* The code below assumes these are different. */
12703 gcc_assert (mp != min_mp);
12704
12705 if (min_mp == NULL)
12706 {
12707 if (min_address > mp->min_address)
12708 mp->min_address = min_address;
12709 }
12710 else
12711 {
12712 /* We will adjust this below if it is too loose. */
12713 mp->min_address = min_address;
12714
12715 /* Unlink MP from its current position. Since min_mp is non-null,
12716 mp->next must be non-null. */
12717 mp->next->prev = mp->prev;
12718 if (mp->prev != NULL)
12719 mp->prev->next = mp->next;
12720 else
12721 minipool_vector_head = mp->next;
12722
12723 /* Reinsert it after MIN_MP. */
12724 mp->prev = min_mp;
12725 mp->next = min_mp->next;
12726 min_mp->next = mp;
12727 if (mp->next != NULL)
12728 mp->next->prev = mp;
12729 else
12730 minipool_vector_tail = mp;
12731 }
12732
12733 min_mp = mp;
12734
12735 offset = 0;
12736 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12737 {
12738 mp->offset = offset;
12739 if (mp->refcount > 0)
12740 offset += mp->fix_size;
12741
12742 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12743 mp->next->min_address = mp->min_address + mp->fix_size;
12744 }
12745
12746 return min_mp;
12747 }
12748
12749 /* Add a constant to the minipool for a backward reference. Returns the
12750 node added or NULL if the constant will not fit in this pool.
12751
12752 Note that the code for insertion for a backwards reference can be
12753 somewhat confusing because the calculated offsets for each fix do
12754 not take into account the size of the pool (which is still under
12755 construction. */
12756 static Mnode *
12757 add_minipool_backward_ref (Mfix *fix)
12758 {
12759 /* If set, min_mp is the last pool_entry that has a lower constraint
12760 than the one we are trying to add. */
12761 Mnode *min_mp = NULL;
12762 /* This can be negative, since it is only a constraint. */
12763 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12764 Mnode *mp;
12765
12766 /* If we can't reach the current pool from this insn, or if we can't
12767 insert this entry at the end of the pool without pushing other
12768 fixes out of range, then we don't try. This ensures that we
12769 can't fail later on. */
12770 if (min_address >= minipool_barrier->address
12771 || (minipool_vector_tail->min_address + fix->fix_size
12772 >= minipool_barrier->address))
12773 return NULL;
12774
12775 /* Scan the pool to see if a constant with the same value has
12776 already been added. While we are doing this, also note the
12777 location where we must insert the constant if it doesn't already
12778 exist. */
12779 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12780 {
12781 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12782 && fix->mode == mp->mode
12783 && (!LABEL_P (fix->value)
12784 || (CODE_LABEL_NUMBER (fix->value)
12785 == CODE_LABEL_NUMBER (mp->value)))
12786 && rtx_equal_p (fix->value, mp->value)
12787 /* Check that there is enough slack to move this entry to the
12788 end of the table (this is conservative). */
12789 && (mp->max_address
12790 > (minipool_barrier->address
12791 + minipool_vector_tail->offset
12792 + minipool_vector_tail->fix_size)))
12793 {
12794 mp->refcount++;
12795 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12796 }
12797
12798 if (min_mp != NULL)
12799 mp->min_address += fix->fix_size;
12800 else
12801 {
12802 /* Note the insertion point if necessary. */
12803 if (mp->min_address < min_address)
12804 {
12805 /* For now, we do not allow the insertion of 8-byte alignment
12806 requiring nodes anywhere but at the start of the pool. */
12807 if (ARM_DOUBLEWORD_ALIGN
12808 && fix->fix_size >= 8 && mp->fix_size < 8)
12809 return NULL;
12810 else
12811 min_mp = mp;
12812 }
12813 else if (mp->max_address
12814 < minipool_barrier->address + mp->offset + fix->fix_size)
12815 {
12816 /* Inserting before this entry would push the fix beyond
12817 its maximum address (which can happen if we have
12818 re-located a forwards fix); force the new fix to come
12819 after it. */
12820 if (ARM_DOUBLEWORD_ALIGN
12821 && fix->fix_size >= 8 && mp->fix_size < 8)
12822 return NULL;
12823 else
12824 {
12825 min_mp = mp;
12826 min_address = mp->min_address + fix->fix_size;
12827 }
12828 }
12829 /* Do not insert a non-8-byte aligned quantity before 8-byte
12830 aligned quantities. */
12831 else if (ARM_DOUBLEWORD_ALIGN
12832 && fix->fix_size < 8
12833 && mp->fix_size >= 8)
12834 {
12835 min_mp = mp;
12836 min_address = mp->min_address + fix->fix_size;
12837 }
12838 }
12839 }
12840
12841 /* We need to create a new entry. */
12842 mp = XNEW (Mnode);
12843 mp->fix_size = fix->fix_size;
12844 mp->mode = fix->mode;
12845 mp->value = fix->value;
12846 mp->refcount = 1;
12847 mp->max_address = minipool_barrier->address + 65536;
12848
12849 mp->min_address = min_address;
12850
12851 if (min_mp == NULL)
12852 {
12853 mp->prev = NULL;
12854 mp->next = minipool_vector_head;
12855
12856 if (mp->next == NULL)
12857 {
12858 minipool_vector_tail = mp;
12859 minipool_vector_label = gen_label_rtx ();
12860 }
12861 else
12862 mp->next->prev = mp;
12863
12864 minipool_vector_head = mp;
12865 }
12866 else
12867 {
12868 mp->next = min_mp->next;
12869 mp->prev = min_mp;
12870 min_mp->next = mp;
12871
12872 if (mp->next != NULL)
12873 mp->next->prev = mp;
12874 else
12875 minipool_vector_tail = mp;
12876 }
12877
12878 /* Save the new entry. */
12879 min_mp = mp;
12880
12881 if (mp->prev)
12882 mp = mp->prev;
12883 else
12884 mp->offset = 0;
12885
12886 /* Scan over the following entries and adjust their offsets. */
12887 while (mp->next != NULL)
12888 {
12889 if (mp->next->min_address < mp->min_address + mp->fix_size)
12890 mp->next->min_address = mp->min_address + mp->fix_size;
12891
12892 if (mp->refcount)
12893 mp->next->offset = mp->offset + mp->fix_size;
12894 else
12895 mp->next->offset = mp->offset;
12896
12897 mp = mp->next;
12898 }
12899
12900 return min_mp;
12901 }
12902
12903 static void
12904 assign_minipool_offsets (Mfix *barrier)
12905 {
12906 HOST_WIDE_INT offset = 0;
12907 Mnode *mp;
12908
12909 minipool_barrier = barrier;
12910
12911 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12912 {
12913 mp->offset = offset;
12914
12915 if (mp->refcount > 0)
12916 offset += mp->fix_size;
12917 }
12918 }
12919
12920 /* Output the literal table */
12921 static void
12922 dump_minipool (rtx scan)
12923 {
12924 Mnode * mp;
12925 Mnode * nmp;
12926 int align64 = 0;
12927
12928 if (ARM_DOUBLEWORD_ALIGN)
12929 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12930 if (mp->refcount > 0 && mp->fix_size >= 8)
12931 {
12932 align64 = 1;
12933 break;
12934 }
12935
12936 if (dump_file)
12937 fprintf (dump_file,
12938 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12939 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12940
12941 scan = emit_label_after (gen_label_rtx (), scan);
12942 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12943 scan = emit_label_after (minipool_vector_label, scan);
12944
12945 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12946 {
12947 if (mp->refcount > 0)
12948 {
12949 if (dump_file)
12950 {
12951 fprintf (dump_file,
12952 ";; Offset %u, min %ld, max %ld ",
12953 (unsigned) mp->offset, (unsigned long) mp->min_address,
12954 (unsigned long) mp->max_address);
12955 arm_print_value (dump_file, mp->value);
12956 fputc ('\n', dump_file);
12957 }
12958
12959 switch (mp->fix_size)
12960 {
12961 #ifdef HAVE_consttable_1
12962 case 1:
12963 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12964 break;
12965
12966 #endif
12967 #ifdef HAVE_consttable_2
12968 case 2:
12969 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12970 break;
12971
12972 #endif
12973 #ifdef HAVE_consttable_4
12974 case 4:
12975 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12976 break;
12977
12978 #endif
12979 #ifdef HAVE_consttable_8
12980 case 8:
12981 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12982 break;
12983
12984 #endif
12985 #ifdef HAVE_consttable_16
12986 case 16:
12987 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12988 break;
12989
12990 #endif
12991 default:
12992 gcc_unreachable ();
12993 }
12994 }
12995
12996 nmp = mp->next;
12997 free (mp);
12998 }
12999
13000 minipool_vector_head = minipool_vector_tail = NULL;
13001 scan = emit_insn_after (gen_consttable_end (), scan);
13002 scan = emit_barrier_after (scan);
13003 }
13004
13005 /* Return the cost of forcibly inserting a barrier after INSN. */
13006 static int
13007 arm_barrier_cost (rtx insn)
13008 {
13009 /* Basing the location of the pool on the loop depth is preferable,
13010 but at the moment, the basic block information seems to be
13011 corrupt by this stage of the compilation. */
13012 int base_cost = 50;
13013 rtx next = next_nonnote_insn (insn);
13014
13015 if (next != NULL && LABEL_P (next))
13016 base_cost -= 20;
13017
13018 switch (GET_CODE (insn))
13019 {
13020 case CODE_LABEL:
13021 /* It will always be better to place the table before the label, rather
13022 than after it. */
13023 return 50;
13024
13025 case INSN:
13026 case CALL_INSN:
13027 return base_cost;
13028
13029 case JUMP_INSN:
13030 return base_cost - 10;
13031
13032 default:
13033 return base_cost + 10;
13034 }
13035 }
13036
13037 /* Find the best place in the insn stream in the range
13038 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13039 Create the barrier by inserting a jump and add a new fix entry for
13040 it. */
13041 static Mfix *
13042 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13043 {
13044 HOST_WIDE_INT count = 0;
13045 rtx barrier;
13046 rtx from = fix->insn;
13047 /* The instruction after which we will insert the jump. */
13048 rtx selected = NULL;
13049 int selected_cost;
13050 /* The address at which the jump instruction will be placed. */
13051 HOST_WIDE_INT selected_address;
13052 Mfix * new_fix;
13053 HOST_WIDE_INT max_count = max_address - fix->address;
13054 rtx label = gen_label_rtx ();
13055
13056 selected_cost = arm_barrier_cost (from);
13057 selected_address = fix->address;
13058
13059 while (from && count < max_count)
13060 {
13061 rtx tmp;
13062 int new_cost;
13063
13064 /* This code shouldn't have been called if there was a natural barrier
13065 within range. */
13066 gcc_assert (!BARRIER_P (from));
13067
13068 /* Count the length of this insn. This must stay in sync with the
13069 code that pushes minipool fixes. */
13070 if (LABEL_P (from))
13071 count += get_label_padding (from);
13072 else
13073 count += get_attr_length (from);
13074
13075 /* If there is a jump table, add its length. */
13076 tmp = is_jump_table (from);
13077 if (tmp != NULL)
13078 {
13079 count += get_jump_table_size (tmp);
13080
13081 /* Jump tables aren't in a basic block, so base the cost on
13082 the dispatch insn. If we select this location, we will
13083 still put the pool after the table. */
13084 new_cost = arm_barrier_cost (from);
13085
13086 if (count < max_count
13087 && (!selected || new_cost <= selected_cost))
13088 {
13089 selected = tmp;
13090 selected_cost = new_cost;
13091 selected_address = fix->address + count;
13092 }
13093
13094 /* Continue after the dispatch table. */
13095 from = NEXT_INSN (tmp);
13096 continue;
13097 }
13098
13099 new_cost = arm_barrier_cost (from);
13100
13101 if (count < max_count
13102 && (!selected || new_cost <= selected_cost))
13103 {
13104 selected = from;
13105 selected_cost = new_cost;
13106 selected_address = fix->address + count;
13107 }
13108
13109 from = NEXT_INSN (from);
13110 }
13111
13112 /* Make sure that we found a place to insert the jump. */
13113 gcc_assert (selected);
13114
13115 /* Make sure we do not split a call and its corresponding
13116 CALL_ARG_LOCATION note. */
13117 if (CALL_P (selected))
13118 {
13119 rtx next = NEXT_INSN (selected);
13120 if (next && NOTE_P (next)
13121 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13122 selected = next;
13123 }
13124
13125 /* Create a new JUMP_INSN that branches around a barrier. */
13126 from = emit_jump_insn_after (gen_jump (label), selected);
13127 JUMP_LABEL (from) = label;
13128 barrier = emit_barrier_after (from);
13129 emit_label_after (label, barrier);
13130
13131 /* Create a minipool barrier entry for the new barrier. */
13132 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13133 new_fix->insn = barrier;
13134 new_fix->address = selected_address;
13135 new_fix->next = fix->next;
13136 fix->next = new_fix;
13137
13138 return new_fix;
13139 }
13140
13141 /* Record that there is a natural barrier in the insn stream at
13142 ADDRESS. */
13143 static void
13144 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13145 {
13146 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13147
13148 fix->insn = insn;
13149 fix->address = address;
13150
13151 fix->next = NULL;
13152 if (minipool_fix_head != NULL)
13153 minipool_fix_tail->next = fix;
13154 else
13155 minipool_fix_head = fix;
13156
13157 minipool_fix_tail = fix;
13158 }
13159
13160 /* Record INSN, which will need fixing up to load a value from the
13161 minipool. ADDRESS is the offset of the insn since the start of the
13162 function; LOC is a pointer to the part of the insn which requires
13163 fixing; VALUE is the constant that must be loaded, which is of type
13164 MODE. */
13165 static void
13166 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13167 enum machine_mode mode, rtx value)
13168 {
13169 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13170
13171 fix->insn = insn;
13172 fix->address = address;
13173 fix->loc = loc;
13174 fix->mode = mode;
13175 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13176 fix->value = value;
13177 fix->forwards = get_attr_pool_range (insn);
13178 fix->backwards = get_attr_neg_pool_range (insn);
13179 fix->minipool = NULL;
13180
13181 /* If an insn doesn't have a range defined for it, then it isn't
13182 expecting to be reworked by this code. Better to stop now than
13183 to generate duff assembly code. */
13184 gcc_assert (fix->forwards || fix->backwards);
13185
13186 /* If an entry requires 8-byte alignment then assume all constant pools
13187 require 4 bytes of padding. Trying to do this later on a per-pool
13188 basis is awkward because existing pool entries have to be modified. */
13189 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13190 minipool_pad = 4;
13191
13192 if (dump_file)
13193 {
13194 fprintf (dump_file,
13195 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13196 GET_MODE_NAME (mode),
13197 INSN_UID (insn), (unsigned long) address,
13198 -1 * (long)fix->backwards, (long)fix->forwards);
13199 arm_print_value (dump_file, fix->value);
13200 fprintf (dump_file, "\n");
13201 }
13202
13203 /* Add it to the chain of fixes. */
13204 fix->next = NULL;
13205
13206 if (minipool_fix_head != NULL)
13207 minipool_fix_tail->next = fix;
13208 else
13209 minipool_fix_head = fix;
13210
13211 minipool_fix_tail = fix;
13212 }
13213
13214 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13215 Returns the number of insns needed, or 99 if we don't know how to
13216 do it. */
13217 int
13218 arm_const_double_inline_cost (rtx val)
13219 {
13220 rtx lowpart, highpart;
13221 enum machine_mode mode;
13222
13223 mode = GET_MODE (val);
13224
13225 if (mode == VOIDmode)
13226 mode = DImode;
13227
13228 gcc_assert (GET_MODE_SIZE (mode) == 8);
13229
13230 lowpart = gen_lowpart (SImode, val);
13231 highpart = gen_highpart_mode (SImode, mode, val);
13232
13233 gcc_assert (CONST_INT_P (lowpart));
13234 gcc_assert (CONST_INT_P (highpart));
13235
13236 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13237 NULL_RTX, NULL_RTX, 0, 0)
13238 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13239 NULL_RTX, NULL_RTX, 0, 0));
13240 }
13241
13242 /* Return true if it is worthwhile to split a 64-bit constant into two
13243 32-bit operations. This is the case if optimizing for size, or
13244 if we have load delay slots, or if one 32-bit part can be done with
13245 a single data operation. */
13246 bool
13247 arm_const_double_by_parts (rtx val)
13248 {
13249 enum machine_mode mode = GET_MODE (val);
13250 rtx part;
13251
13252 if (optimize_size || arm_ld_sched)
13253 return true;
13254
13255 if (mode == VOIDmode)
13256 mode = DImode;
13257
13258 part = gen_highpart_mode (SImode, mode, val);
13259
13260 gcc_assert (CONST_INT_P (part));
13261
13262 if (const_ok_for_arm (INTVAL (part))
13263 || const_ok_for_arm (~INTVAL (part)))
13264 return true;
13265
13266 part = gen_lowpart (SImode, val);
13267
13268 gcc_assert (CONST_INT_P (part));
13269
13270 if (const_ok_for_arm (INTVAL (part))
13271 || const_ok_for_arm (~INTVAL (part)))
13272 return true;
13273
13274 return false;
13275 }
13276
13277 /* Return true if it is possible to inline both the high and low parts
13278 of a 64-bit constant into 32-bit data processing instructions. */
13279 bool
13280 arm_const_double_by_immediates (rtx val)
13281 {
13282 enum machine_mode mode = GET_MODE (val);
13283 rtx part;
13284
13285 if (mode == VOIDmode)
13286 mode = DImode;
13287
13288 part = gen_highpart_mode (SImode, mode, val);
13289
13290 gcc_assert (CONST_INT_P (part));
13291
13292 if (!const_ok_for_arm (INTVAL (part)))
13293 return false;
13294
13295 part = gen_lowpart (SImode, val);
13296
13297 gcc_assert (CONST_INT_P (part));
13298
13299 if (!const_ok_for_arm (INTVAL (part)))
13300 return false;
13301
13302 return true;
13303 }
13304
13305 /* Scan INSN and note any of its operands that need fixing.
13306 If DO_PUSHES is false we do not actually push any of the fixups
13307 needed. */
13308 static void
13309 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13310 {
13311 int opno;
13312
13313 extract_insn (insn);
13314
13315 if (!constrain_operands (1))
13316 fatal_insn_not_found (insn);
13317
13318 if (recog_data.n_alternatives == 0)
13319 return;
13320
13321 /* Fill in recog_op_alt with information about the constraints of
13322 this insn. */
13323 preprocess_constraints ();
13324
13325 for (opno = 0; opno < recog_data.n_operands; opno++)
13326 {
13327 /* Things we need to fix can only occur in inputs. */
13328 if (recog_data.operand_type[opno] != OP_IN)
13329 continue;
13330
13331 /* If this alternative is a memory reference, then any mention
13332 of constants in this alternative is really to fool reload
13333 into allowing us to accept one there. We need to fix them up
13334 now so that we output the right code. */
13335 if (recog_op_alt[opno][which_alternative].memory_ok)
13336 {
13337 rtx op = recog_data.operand[opno];
13338
13339 if (CONSTANT_P (op))
13340 {
13341 if (do_pushes)
13342 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13343 recog_data.operand_mode[opno], op);
13344 }
13345 else if (MEM_P (op)
13346 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13347 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13348 {
13349 if (do_pushes)
13350 {
13351 rtx cop = avoid_constant_pool_reference (op);
13352
13353 /* Casting the address of something to a mode narrower
13354 than a word can cause avoid_constant_pool_reference()
13355 to return the pool reference itself. That's no good to
13356 us here. Lets just hope that we can use the
13357 constant pool value directly. */
13358 if (op == cop)
13359 cop = get_pool_constant (XEXP (op, 0));
13360
13361 push_minipool_fix (insn, address,
13362 recog_data.operand_loc[opno],
13363 recog_data.operand_mode[opno], cop);
13364 }
13365
13366 }
13367 }
13368 }
13369
13370 return;
13371 }
13372
13373 /* Convert instructions to their cc-clobbering variant if possible, since
13374 that allows us to use smaller encodings. */
13375
13376 static void
13377 thumb2_reorg (void)
13378 {
13379 basic_block bb;
13380 regset_head live;
13381
13382 INIT_REG_SET (&live);
13383
13384 /* We are freeing block_for_insn in the toplev to keep compatibility
13385 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13386 compute_bb_for_insn ();
13387 df_analyze ();
13388
13389 FOR_EACH_BB (bb)
13390 {
13391 rtx insn;
13392
13393 COPY_REG_SET (&live, DF_LR_OUT (bb));
13394 df_simulate_initialize_backwards (bb, &live);
13395 FOR_BB_INSNS_REVERSE (bb, insn)
13396 {
13397 if (NONJUMP_INSN_P (insn)
13398 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13399 && GET_CODE (PATTERN (insn)) == SET)
13400 {
13401 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13402 rtx pat = PATTERN (insn);
13403 rtx dst = XEXP (pat, 0);
13404 rtx src = XEXP (pat, 1);
13405 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13406
13407 if (!OBJECT_P (src))
13408 op0 = XEXP (src, 0);
13409
13410 if (BINARY_P (src))
13411 op1 = XEXP (src, 1);
13412
13413 if (low_register_operand (dst, SImode))
13414 {
13415 switch (GET_CODE (src))
13416 {
13417 case PLUS:
13418 /* Adding two registers and storing the result
13419 in the first source is already a 16-bit
13420 operation. */
13421 if (rtx_equal_p (dst, op0)
13422 && register_operand (op1, SImode))
13423 break;
13424
13425 if (low_register_operand (op0, SImode))
13426 {
13427 /* ADDS <Rd>,<Rn>,<Rm> */
13428 if (low_register_operand (op1, SImode))
13429 action = CONV;
13430 /* ADDS <Rdn>,#<imm8> */
13431 /* SUBS <Rdn>,#<imm8> */
13432 else if (rtx_equal_p (dst, op0)
13433 && CONST_INT_P (op1)
13434 && IN_RANGE (INTVAL (op1), -255, 255))
13435 action = CONV;
13436 /* ADDS <Rd>,<Rn>,#<imm3> */
13437 /* SUBS <Rd>,<Rn>,#<imm3> */
13438 else if (CONST_INT_P (op1)
13439 && IN_RANGE (INTVAL (op1), -7, 7))
13440 action = CONV;
13441 }
13442 break;
13443
13444 case MINUS:
13445 /* RSBS <Rd>,<Rn>,#0
13446 Not handled here: see NEG below. */
13447 /* SUBS <Rd>,<Rn>,#<imm3>
13448 SUBS <Rdn>,#<imm8>
13449 Not handled here: see PLUS above. */
13450 /* SUBS <Rd>,<Rn>,<Rm> */
13451 if (low_register_operand (op0, SImode)
13452 && low_register_operand (op1, SImode))
13453 action = CONV;
13454 break;
13455
13456 case MULT:
13457 /* MULS <Rdm>,<Rn>,<Rdm>
13458 As an exception to the rule, this is only used
13459 when optimizing for size since MULS is slow on all
13460 known implementations. We do not even want to use
13461 MULS in cold code, if optimizing for speed, so we
13462 test the global flag here. */
13463 if (!optimize_size)
13464 break;
13465 /* else fall through. */
13466 case AND:
13467 case IOR:
13468 case XOR:
13469 /* ANDS <Rdn>,<Rm> */
13470 if (rtx_equal_p (dst, op0)
13471 && low_register_operand (op1, SImode))
13472 action = CONV;
13473 else if (rtx_equal_p (dst, op1)
13474 && low_register_operand (op0, SImode))
13475 action = SWAP_CONV;
13476 break;
13477
13478 case ASHIFTRT:
13479 case ASHIFT:
13480 case LSHIFTRT:
13481 /* ASRS <Rdn>,<Rm> */
13482 /* LSRS <Rdn>,<Rm> */
13483 /* LSLS <Rdn>,<Rm> */
13484 if (rtx_equal_p (dst, op0)
13485 && low_register_operand (op1, SImode))
13486 action = CONV;
13487 /* ASRS <Rd>,<Rm>,#<imm5> */
13488 /* LSRS <Rd>,<Rm>,#<imm5> */
13489 /* LSLS <Rd>,<Rm>,#<imm5> */
13490 else if (low_register_operand (op0, SImode)
13491 && CONST_INT_P (op1)
13492 && IN_RANGE (INTVAL (op1), 0, 31))
13493 action = CONV;
13494 break;
13495
13496 case ROTATERT:
13497 /* RORS <Rdn>,<Rm> */
13498 if (rtx_equal_p (dst, op0)
13499 && low_register_operand (op1, SImode))
13500 action = CONV;
13501 break;
13502
13503 case NOT:
13504 case NEG:
13505 /* MVNS <Rd>,<Rm> */
13506 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13507 if (low_register_operand (op0, SImode))
13508 action = CONV;
13509 break;
13510
13511 case CONST_INT:
13512 /* MOVS <Rd>,#<imm8> */
13513 if (CONST_INT_P (src)
13514 && IN_RANGE (INTVAL (src), 0, 255))
13515 action = CONV;
13516 break;
13517
13518 case REG:
13519 /* MOVS and MOV<c> with registers have different
13520 encodings, so are not relevant here. */
13521 break;
13522
13523 default:
13524 break;
13525 }
13526 }
13527
13528 if (action != SKIP)
13529 {
13530 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13531 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13532 rtvec vec;
13533
13534 if (action == SWAP_CONV)
13535 {
13536 src = copy_rtx (src);
13537 XEXP (src, 0) = op1;
13538 XEXP (src, 1) = op0;
13539 pat = gen_rtx_SET (VOIDmode, dst, src);
13540 vec = gen_rtvec (2, pat, clobber);
13541 }
13542 else /* action == CONV */
13543 vec = gen_rtvec (2, pat, clobber);
13544
13545 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13546 INSN_CODE (insn) = -1;
13547 }
13548 }
13549
13550 if (NONDEBUG_INSN_P (insn))
13551 df_simulate_one_insn_backwards (bb, insn, &live);
13552 }
13553 }
13554
13555 CLEAR_REG_SET (&live);
13556 }
13557
13558 /* Gcc puts the pool in the wrong place for ARM, since we can only
13559 load addresses a limited distance around the pc. We do some
13560 special munging to move the constant pool values to the correct
13561 point in the code. */
13562 static void
13563 arm_reorg (void)
13564 {
13565 rtx insn;
13566 HOST_WIDE_INT address = 0;
13567 Mfix * fix;
13568
13569 if (TARGET_THUMB2)
13570 thumb2_reorg ();
13571
13572 /* Ensure all insns that must be split have been split at this point.
13573 Otherwise, the pool placement code below may compute incorrect
13574 insn lengths. Note that when optimizing, all insns have already
13575 been split at this point. */
13576 if (!optimize)
13577 split_all_insns_noflow ();
13578
13579 minipool_fix_head = minipool_fix_tail = NULL;
13580
13581 /* The first insn must always be a note, or the code below won't
13582 scan it properly. */
13583 insn = get_insns ();
13584 gcc_assert (NOTE_P (insn));
13585 minipool_pad = 0;
13586
13587 /* Scan all the insns and record the operands that will need fixing. */
13588 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13589 {
13590 if (BARRIER_P (insn))
13591 push_minipool_barrier (insn, address);
13592 else if (INSN_P (insn))
13593 {
13594 rtx table;
13595
13596 note_invalid_constants (insn, address, true);
13597 address += get_attr_length (insn);
13598
13599 /* If the insn is a vector jump, add the size of the table
13600 and skip the table. */
13601 if ((table = is_jump_table (insn)) != NULL)
13602 {
13603 address += get_jump_table_size (table);
13604 insn = table;
13605 }
13606 }
13607 else if (LABEL_P (insn))
13608 /* Add the worst-case padding due to alignment. We don't add
13609 the _current_ padding because the minipool insertions
13610 themselves might change it. */
13611 address += get_label_padding (insn);
13612 }
13613
13614 fix = minipool_fix_head;
13615
13616 /* Now scan the fixups and perform the required changes. */
13617 while (fix)
13618 {
13619 Mfix * ftmp;
13620 Mfix * fdel;
13621 Mfix * last_added_fix;
13622 Mfix * last_barrier = NULL;
13623 Mfix * this_fix;
13624
13625 /* Skip any further barriers before the next fix. */
13626 while (fix && BARRIER_P (fix->insn))
13627 fix = fix->next;
13628
13629 /* No more fixes. */
13630 if (fix == NULL)
13631 break;
13632
13633 last_added_fix = NULL;
13634
13635 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13636 {
13637 if (BARRIER_P (ftmp->insn))
13638 {
13639 if (ftmp->address >= minipool_vector_head->max_address)
13640 break;
13641
13642 last_barrier = ftmp;
13643 }
13644 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13645 break;
13646
13647 last_added_fix = ftmp; /* Keep track of the last fix added. */
13648 }
13649
13650 /* If we found a barrier, drop back to that; any fixes that we
13651 could have reached but come after the barrier will now go in
13652 the next mini-pool. */
13653 if (last_barrier != NULL)
13654 {
13655 /* Reduce the refcount for those fixes that won't go into this
13656 pool after all. */
13657 for (fdel = last_barrier->next;
13658 fdel && fdel != ftmp;
13659 fdel = fdel->next)
13660 {
13661 fdel->minipool->refcount--;
13662 fdel->minipool = NULL;
13663 }
13664
13665 ftmp = last_barrier;
13666 }
13667 else
13668 {
13669 /* ftmp is first fix that we can't fit into this pool and
13670 there no natural barriers that we could use. Insert a
13671 new barrier in the code somewhere between the previous
13672 fix and this one, and arrange to jump around it. */
13673 HOST_WIDE_INT max_address;
13674
13675 /* The last item on the list of fixes must be a barrier, so
13676 we can never run off the end of the list of fixes without
13677 last_barrier being set. */
13678 gcc_assert (ftmp);
13679
13680 max_address = minipool_vector_head->max_address;
13681 /* Check that there isn't another fix that is in range that
13682 we couldn't fit into this pool because the pool was
13683 already too large: we need to put the pool before such an
13684 instruction. The pool itself may come just after the
13685 fix because create_fix_barrier also allows space for a
13686 jump instruction. */
13687 if (ftmp->address < max_address)
13688 max_address = ftmp->address + 1;
13689
13690 last_barrier = create_fix_barrier (last_added_fix, max_address);
13691 }
13692
13693 assign_minipool_offsets (last_barrier);
13694
13695 while (ftmp)
13696 {
13697 if (!BARRIER_P (ftmp->insn)
13698 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13699 == NULL))
13700 break;
13701
13702 ftmp = ftmp->next;
13703 }
13704
13705 /* Scan over the fixes we have identified for this pool, fixing them
13706 up and adding the constants to the pool itself. */
13707 for (this_fix = fix; this_fix && ftmp != this_fix;
13708 this_fix = this_fix->next)
13709 if (!BARRIER_P (this_fix->insn))
13710 {
13711 rtx addr
13712 = plus_constant (Pmode,
13713 gen_rtx_LABEL_REF (VOIDmode,
13714 minipool_vector_label),
13715 this_fix->minipool->offset);
13716 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13717 }
13718
13719 dump_minipool (last_barrier->insn);
13720 fix = ftmp;
13721 }
13722
13723 /* From now on we must synthesize any constants that we can't handle
13724 directly. This can happen if the RTL gets split during final
13725 instruction generation. */
13726 after_arm_reorg = 1;
13727
13728 /* Free the minipool memory. */
13729 obstack_free (&minipool_obstack, minipool_startobj);
13730 }
13731 \f
13732 /* Routines to output assembly language. */
13733
13734 /* If the rtx is the correct value then return the string of the number.
13735 In this way we can ensure that valid double constants are generated even
13736 when cross compiling. */
13737 const char *
13738 fp_immediate_constant (rtx x)
13739 {
13740 REAL_VALUE_TYPE r;
13741
13742 if (!fp_consts_inited)
13743 init_fp_table ();
13744
13745 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13746
13747 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13748 return "0";
13749 }
13750
13751 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13752 static const char *
13753 fp_const_from_val (REAL_VALUE_TYPE *r)
13754 {
13755 if (!fp_consts_inited)
13756 init_fp_table ();
13757
13758 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13759 return "0";
13760 }
13761
13762 /* OPERANDS[0] is the entire list of insns that constitute pop,
13763 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13764 is in the list, UPDATE is true iff the list contains explicit
13765 update of base register. */
13766 void
13767 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13768 bool update)
13769 {
13770 int i;
13771 char pattern[100];
13772 int offset;
13773 const char *conditional;
13774 int num_saves = XVECLEN (operands[0], 0);
13775 unsigned int regno;
13776 unsigned int regno_base = REGNO (operands[1]);
13777
13778 offset = 0;
13779 offset += update ? 1 : 0;
13780 offset += return_pc ? 1 : 0;
13781
13782 /* Is the base register in the list? */
13783 for (i = offset; i < num_saves; i++)
13784 {
13785 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13786 /* If SP is in the list, then the base register must be SP. */
13787 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13788 /* If base register is in the list, there must be no explicit update. */
13789 if (regno == regno_base)
13790 gcc_assert (!update);
13791 }
13792
13793 conditional = reverse ? "%?%D0" : "%?%d0";
13794 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13795 {
13796 /* Output pop (not stmfd) because it has a shorter encoding. */
13797 gcc_assert (update);
13798 sprintf (pattern, "pop%s\t{", conditional);
13799 }
13800 else
13801 {
13802 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13803 It's just a convention, their semantics are identical. */
13804 if (regno_base == SP_REGNUM)
13805 sprintf (pattern, "ldm%sfd\t", conditional);
13806 else if (TARGET_UNIFIED_ASM)
13807 sprintf (pattern, "ldmia%s\t", conditional);
13808 else
13809 sprintf (pattern, "ldm%sia\t", conditional);
13810
13811 strcat (pattern, reg_names[regno_base]);
13812 if (update)
13813 strcat (pattern, "!, {");
13814 else
13815 strcat (pattern, ", {");
13816 }
13817
13818 /* Output the first destination register. */
13819 strcat (pattern,
13820 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13821
13822 /* Output the rest of the destination registers. */
13823 for (i = offset + 1; i < num_saves; i++)
13824 {
13825 strcat (pattern, ", ");
13826 strcat (pattern,
13827 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13828 }
13829
13830 strcat (pattern, "}");
13831
13832 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13833 strcat (pattern, "^");
13834
13835 output_asm_insn (pattern, &cond);
13836 }
13837
13838
13839 /* Output the assembly for a store multiple. */
13840
13841 const char *
13842 vfp_output_fstmd (rtx * operands)
13843 {
13844 char pattern[100];
13845 int p;
13846 int base;
13847 int i;
13848
13849 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13850 p = strlen (pattern);
13851
13852 gcc_assert (REG_P (operands[1]));
13853
13854 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13855 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13856 {
13857 p += sprintf (&pattern[p], ", d%d", base + i);
13858 }
13859 strcpy (&pattern[p], "}");
13860
13861 output_asm_insn (pattern, operands);
13862 return "";
13863 }
13864
13865
13866 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13867 number of bytes pushed. */
13868
13869 static int
13870 vfp_emit_fstmd (int base_reg, int count)
13871 {
13872 rtx par;
13873 rtx dwarf;
13874 rtx tmp, reg;
13875 int i;
13876
13877 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13878 register pairs are stored by a store multiple insn. We avoid this
13879 by pushing an extra pair. */
13880 if (count == 2 && !arm_arch6)
13881 {
13882 if (base_reg == LAST_VFP_REGNUM - 3)
13883 base_reg -= 2;
13884 count++;
13885 }
13886
13887 /* FSTMD may not store more than 16 doubleword registers at once. Split
13888 larger stores into multiple parts (up to a maximum of two, in
13889 practice). */
13890 if (count > 16)
13891 {
13892 int saved;
13893 /* NOTE: base_reg is an internal register number, so each D register
13894 counts as 2. */
13895 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13896 saved += vfp_emit_fstmd (base_reg, 16);
13897 return saved;
13898 }
13899
13900 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13901 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13902
13903 reg = gen_rtx_REG (DFmode, base_reg);
13904 base_reg += 2;
13905
13906 XVECEXP (par, 0, 0)
13907 = gen_rtx_SET (VOIDmode,
13908 gen_frame_mem
13909 (BLKmode,
13910 gen_rtx_PRE_MODIFY (Pmode,
13911 stack_pointer_rtx,
13912 plus_constant
13913 (Pmode, stack_pointer_rtx,
13914 - (count * 8)))
13915 ),
13916 gen_rtx_UNSPEC (BLKmode,
13917 gen_rtvec (1, reg),
13918 UNSPEC_PUSH_MULT));
13919
13920 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13921 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13922 RTX_FRAME_RELATED_P (tmp) = 1;
13923 XVECEXP (dwarf, 0, 0) = tmp;
13924
13925 tmp = gen_rtx_SET (VOIDmode,
13926 gen_frame_mem (DFmode, stack_pointer_rtx),
13927 reg);
13928 RTX_FRAME_RELATED_P (tmp) = 1;
13929 XVECEXP (dwarf, 0, 1) = tmp;
13930
13931 for (i = 1; i < count; i++)
13932 {
13933 reg = gen_rtx_REG (DFmode, base_reg);
13934 base_reg += 2;
13935 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13936
13937 tmp = gen_rtx_SET (VOIDmode,
13938 gen_frame_mem (DFmode,
13939 plus_constant (Pmode,
13940 stack_pointer_rtx,
13941 i * 8)),
13942 reg);
13943 RTX_FRAME_RELATED_P (tmp) = 1;
13944 XVECEXP (dwarf, 0, i + 1) = tmp;
13945 }
13946
13947 par = emit_insn (par);
13948 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13949 RTX_FRAME_RELATED_P (par) = 1;
13950
13951 return count * 8;
13952 }
13953
13954 /* Emit a call instruction with pattern PAT. ADDR is the address of
13955 the call target. */
13956
13957 void
13958 arm_emit_call_insn (rtx pat, rtx addr)
13959 {
13960 rtx insn;
13961
13962 insn = emit_call_insn (pat);
13963
13964 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13965 If the call might use such an entry, add a use of the PIC register
13966 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13967 if (TARGET_VXWORKS_RTP
13968 && flag_pic
13969 && GET_CODE (addr) == SYMBOL_REF
13970 && (SYMBOL_REF_DECL (addr)
13971 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13972 : !SYMBOL_REF_LOCAL_P (addr)))
13973 {
13974 require_pic_register ();
13975 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13976 }
13977 }
13978
13979 /* Output a 'call' insn. */
13980 const char *
13981 output_call (rtx *operands)
13982 {
13983 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13984
13985 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13986 if (REGNO (operands[0]) == LR_REGNUM)
13987 {
13988 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13989 output_asm_insn ("mov%?\t%0, %|lr", operands);
13990 }
13991
13992 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13993
13994 if (TARGET_INTERWORK || arm_arch4t)
13995 output_asm_insn ("bx%?\t%0", operands);
13996 else
13997 output_asm_insn ("mov%?\t%|pc, %0", operands);
13998
13999 return "";
14000 }
14001
14002 /* Output a 'call' insn that is a reference in memory. This is
14003 disabled for ARMv5 and we prefer a blx instead because otherwise
14004 there's a significant performance overhead. */
14005 const char *
14006 output_call_mem (rtx *operands)
14007 {
14008 gcc_assert (!arm_arch5);
14009 if (TARGET_INTERWORK)
14010 {
14011 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14012 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14013 output_asm_insn ("bx%?\t%|ip", operands);
14014 }
14015 else if (regno_use_in (LR_REGNUM, operands[0]))
14016 {
14017 /* LR is used in the memory address. We load the address in the
14018 first instruction. It's safe to use IP as the target of the
14019 load since the call will kill it anyway. */
14020 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14021 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14022 if (arm_arch4t)
14023 output_asm_insn ("bx%?\t%|ip", operands);
14024 else
14025 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14026 }
14027 else
14028 {
14029 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14030 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14031 }
14032
14033 return "";
14034 }
14035
14036
14037 /* Output a move from arm registers to arm registers of a long double
14038 OPERANDS[0] is the destination.
14039 OPERANDS[1] is the source. */
14040 const char *
14041 output_mov_long_double_arm_from_arm (rtx *operands)
14042 {
14043 /* We have to be careful here because the two might overlap. */
14044 int dest_start = REGNO (operands[0]);
14045 int src_start = REGNO (operands[1]);
14046 rtx ops[2];
14047 int i;
14048
14049 if (dest_start < src_start)
14050 {
14051 for (i = 0; i < 3; i++)
14052 {
14053 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14054 ops[1] = gen_rtx_REG (SImode, src_start + i);
14055 output_asm_insn ("mov%?\t%0, %1", ops);
14056 }
14057 }
14058 else
14059 {
14060 for (i = 2; i >= 0; i--)
14061 {
14062 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14063 ops[1] = gen_rtx_REG (SImode, src_start + i);
14064 output_asm_insn ("mov%?\t%0, %1", ops);
14065 }
14066 }
14067
14068 return "";
14069 }
14070
14071 void
14072 arm_emit_movpair (rtx dest, rtx src)
14073 {
14074 /* If the src is an immediate, simplify it. */
14075 if (CONST_INT_P (src))
14076 {
14077 HOST_WIDE_INT val = INTVAL (src);
14078 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14079 if ((val >> 16) & 0x0000ffff)
14080 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14081 GEN_INT (16)),
14082 GEN_INT ((val >> 16) & 0x0000ffff));
14083 return;
14084 }
14085 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14086 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14087 }
14088
14089 /* Output a move between double words. It must be REG<-MEM
14090 or MEM<-REG. */
14091 const char *
14092 output_move_double (rtx *operands, bool emit, int *count)
14093 {
14094 enum rtx_code code0 = GET_CODE (operands[0]);
14095 enum rtx_code code1 = GET_CODE (operands[1]);
14096 rtx otherops[3];
14097 if (count)
14098 *count = 1;
14099
14100 /* The only case when this might happen is when
14101 you are looking at the length of a DImode instruction
14102 that has an invalid constant in it. */
14103 if (code0 == REG && code1 != MEM)
14104 {
14105 gcc_assert (!emit);
14106 *count = 2;
14107 return "";
14108 }
14109
14110 if (code0 == REG)
14111 {
14112 unsigned int reg0 = REGNO (operands[0]);
14113
14114 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14115
14116 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14117
14118 switch (GET_CODE (XEXP (operands[1], 0)))
14119 {
14120 case REG:
14121
14122 if (emit)
14123 {
14124 if (TARGET_LDRD
14125 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14126 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14127 else
14128 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14129 }
14130 break;
14131
14132 case PRE_INC:
14133 gcc_assert (TARGET_LDRD);
14134 if (emit)
14135 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14136 break;
14137
14138 case PRE_DEC:
14139 if (emit)
14140 {
14141 if (TARGET_LDRD)
14142 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14143 else
14144 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14145 }
14146 break;
14147
14148 case POST_INC:
14149 if (emit)
14150 {
14151 if (TARGET_LDRD)
14152 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14153 else
14154 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14155 }
14156 break;
14157
14158 case POST_DEC:
14159 gcc_assert (TARGET_LDRD);
14160 if (emit)
14161 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14162 break;
14163
14164 case PRE_MODIFY:
14165 case POST_MODIFY:
14166 /* Autoicrement addressing modes should never have overlapping
14167 base and destination registers, and overlapping index registers
14168 are already prohibited, so this doesn't need to worry about
14169 fix_cm3_ldrd. */
14170 otherops[0] = operands[0];
14171 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14172 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14173
14174 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14175 {
14176 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14177 {
14178 /* Registers overlap so split out the increment. */
14179 if (emit)
14180 {
14181 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14182 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14183 }
14184 if (count)
14185 *count = 2;
14186 }
14187 else
14188 {
14189 /* Use a single insn if we can.
14190 FIXME: IWMMXT allows offsets larger than ldrd can
14191 handle, fix these up with a pair of ldr. */
14192 if (TARGET_THUMB2
14193 || !CONST_INT_P (otherops[2])
14194 || (INTVAL (otherops[2]) > -256
14195 && INTVAL (otherops[2]) < 256))
14196 {
14197 if (emit)
14198 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14199 }
14200 else
14201 {
14202 if (emit)
14203 {
14204 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14205 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14206 }
14207 if (count)
14208 *count = 2;
14209
14210 }
14211 }
14212 }
14213 else
14214 {
14215 /* Use a single insn if we can.
14216 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14217 fix these up with a pair of ldr. */
14218 if (TARGET_THUMB2
14219 || !CONST_INT_P (otherops[2])
14220 || (INTVAL (otherops[2]) > -256
14221 && INTVAL (otherops[2]) < 256))
14222 {
14223 if (emit)
14224 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14225 }
14226 else
14227 {
14228 if (emit)
14229 {
14230 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14231 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14232 }
14233 if (count)
14234 *count = 2;
14235 }
14236 }
14237 break;
14238
14239 case LABEL_REF:
14240 case CONST:
14241 /* We might be able to use ldrd %0, %1 here. However the range is
14242 different to ldr/adr, and it is broken on some ARMv7-M
14243 implementations. */
14244 /* Use the second register of the pair to avoid problematic
14245 overlap. */
14246 otherops[1] = operands[1];
14247 if (emit)
14248 output_asm_insn ("adr%?\t%0, %1", otherops);
14249 operands[1] = otherops[0];
14250 if (emit)
14251 {
14252 if (TARGET_LDRD)
14253 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14254 else
14255 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14256 }
14257
14258 if (count)
14259 *count = 2;
14260 break;
14261
14262 /* ??? This needs checking for thumb2. */
14263 default:
14264 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14265 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14266 {
14267 otherops[0] = operands[0];
14268 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14269 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14270
14271 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14272 {
14273 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14274 {
14275 switch ((int) INTVAL (otherops[2]))
14276 {
14277 case -8:
14278 if (emit)
14279 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14280 return "";
14281 case -4:
14282 if (TARGET_THUMB2)
14283 break;
14284 if (emit)
14285 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14286 return "";
14287 case 4:
14288 if (TARGET_THUMB2)
14289 break;
14290 if (emit)
14291 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14292 return "";
14293 }
14294 }
14295 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14296 operands[1] = otherops[0];
14297 if (TARGET_LDRD
14298 && (REG_P (otherops[2])
14299 || TARGET_THUMB2
14300 || (CONST_INT_P (otherops[2])
14301 && INTVAL (otherops[2]) > -256
14302 && INTVAL (otherops[2]) < 256)))
14303 {
14304 if (reg_overlap_mentioned_p (operands[0],
14305 otherops[2]))
14306 {
14307 rtx tmp;
14308 /* Swap base and index registers over to
14309 avoid a conflict. */
14310 tmp = otherops[1];
14311 otherops[1] = otherops[2];
14312 otherops[2] = tmp;
14313 }
14314 /* If both registers conflict, it will usually
14315 have been fixed by a splitter. */
14316 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14317 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14318 {
14319 if (emit)
14320 {
14321 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14322 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14323 }
14324 if (count)
14325 *count = 2;
14326 }
14327 else
14328 {
14329 otherops[0] = operands[0];
14330 if (emit)
14331 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14332 }
14333 return "";
14334 }
14335
14336 if (CONST_INT_P (otherops[2]))
14337 {
14338 if (emit)
14339 {
14340 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14341 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14342 else
14343 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14344 }
14345 }
14346 else
14347 {
14348 if (emit)
14349 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14350 }
14351 }
14352 else
14353 {
14354 if (emit)
14355 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14356 }
14357
14358 if (count)
14359 *count = 2;
14360
14361 if (TARGET_LDRD)
14362 return "ldr%(d%)\t%0, [%1]";
14363
14364 return "ldm%(ia%)\t%1, %M0";
14365 }
14366 else
14367 {
14368 otherops[1] = adjust_address (operands[1], SImode, 4);
14369 /* Take care of overlapping base/data reg. */
14370 if (reg_mentioned_p (operands[0], operands[1]))
14371 {
14372 if (emit)
14373 {
14374 output_asm_insn ("ldr%?\t%0, %1", otherops);
14375 output_asm_insn ("ldr%?\t%0, %1", operands);
14376 }
14377 if (count)
14378 *count = 2;
14379
14380 }
14381 else
14382 {
14383 if (emit)
14384 {
14385 output_asm_insn ("ldr%?\t%0, %1", operands);
14386 output_asm_insn ("ldr%?\t%0, %1", otherops);
14387 }
14388 if (count)
14389 *count = 2;
14390 }
14391 }
14392 }
14393 }
14394 else
14395 {
14396 /* Constraints should ensure this. */
14397 gcc_assert (code0 == MEM && code1 == REG);
14398 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14399
14400 switch (GET_CODE (XEXP (operands[0], 0)))
14401 {
14402 case REG:
14403 if (emit)
14404 {
14405 if (TARGET_LDRD)
14406 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14407 else
14408 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14409 }
14410 break;
14411
14412 case PRE_INC:
14413 gcc_assert (TARGET_LDRD);
14414 if (emit)
14415 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14416 break;
14417
14418 case PRE_DEC:
14419 if (emit)
14420 {
14421 if (TARGET_LDRD)
14422 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14423 else
14424 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14425 }
14426 break;
14427
14428 case POST_INC:
14429 if (emit)
14430 {
14431 if (TARGET_LDRD)
14432 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14433 else
14434 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14435 }
14436 break;
14437
14438 case POST_DEC:
14439 gcc_assert (TARGET_LDRD);
14440 if (emit)
14441 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14442 break;
14443
14444 case PRE_MODIFY:
14445 case POST_MODIFY:
14446 otherops[0] = operands[1];
14447 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14448 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14449
14450 /* IWMMXT allows offsets larger than ldrd can handle,
14451 fix these up with a pair of ldr. */
14452 if (!TARGET_THUMB2
14453 && CONST_INT_P (otherops[2])
14454 && (INTVAL(otherops[2]) <= -256
14455 || INTVAL(otherops[2]) >= 256))
14456 {
14457 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14458 {
14459 if (emit)
14460 {
14461 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14462 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14463 }
14464 if (count)
14465 *count = 2;
14466 }
14467 else
14468 {
14469 if (emit)
14470 {
14471 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14472 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14473 }
14474 if (count)
14475 *count = 2;
14476 }
14477 }
14478 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14479 {
14480 if (emit)
14481 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14482 }
14483 else
14484 {
14485 if (emit)
14486 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14487 }
14488 break;
14489
14490 case PLUS:
14491 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14492 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14493 {
14494 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14495 {
14496 case -8:
14497 if (emit)
14498 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14499 return "";
14500
14501 case -4:
14502 if (TARGET_THUMB2)
14503 break;
14504 if (emit)
14505 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14506 return "";
14507
14508 case 4:
14509 if (TARGET_THUMB2)
14510 break;
14511 if (emit)
14512 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14513 return "";
14514 }
14515 }
14516 if (TARGET_LDRD
14517 && (REG_P (otherops[2])
14518 || TARGET_THUMB2
14519 || (CONST_INT_P (otherops[2])
14520 && INTVAL (otherops[2]) > -256
14521 && INTVAL (otherops[2]) < 256)))
14522 {
14523 otherops[0] = operands[1];
14524 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14525 if (emit)
14526 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14527 return "";
14528 }
14529 /* Fall through */
14530
14531 default:
14532 otherops[0] = adjust_address (operands[0], SImode, 4);
14533 otherops[1] = operands[1];
14534 if (emit)
14535 {
14536 output_asm_insn ("str%?\t%1, %0", operands);
14537 output_asm_insn ("str%?\t%H1, %0", otherops);
14538 }
14539 if (count)
14540 *count = 2;
14541 }
14542 }
14543
14544 return "";
14545 }
14546
14547 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14548 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14549
14550 const char *
14551 output_move_quad (rtx *operands)
14552 {
14553 if (REG_P (operands[0]))
14554 {
14555 /* Load, or reg->reg move. */
14556
14557 if (MEM_P (operands[1]))
14558 {
14559 switch (GET_CODE (XEXP (operands[1], 0)))
14560 {
14561 case REG:
14562 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14563 break;
14564
14565 case LABEL_REF:
14566 case CONST:
14567 output_asm_insn ("adr%?\t%0, %1", operands);
14568 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14569 break;
14570
14571 default:
14572 gcc_unreachable ();
14573 }
14574 }
14575 else
14576 {
14577 rtx ops[2];
14578 int dest, src, i;
14579
14580 gcc_assert (REG_P (operands[1]));
14581
14582 dest = REGNO (operands[0]);
14583 src = REGNO (operands[1]);
14584
14585 /* This seems pretty dumb, but hopefully GCC won't try to do it
14586 very often. */
14587 if (dest < src)
14588 for (i = 0; i < 4; i++)
14589 {
14590 ops[0] = gen_rtx_REG (SImode, dest + i);
14591 ops[1] = gen_rtx_REG (SImode, src + i);
14592 output_asm_insn ("mov%?\t%0, %1", ops);
14593 }
14594 else
14595 for (i = 3; i >= 0; i--)
14596 {
14597 ops[0] = gen_rtx_REG (SImode, dest + i);
14598 ops[1] = gen_rtx_REG (SImode, src + i);
14599 output_asm_insn ("mov%?\t%0, %1", ops);
14600 }
14601 }
14602 }
14603 else
14604 {
14605 gcc_assert (MEM_P (operands[0]));
14606 gcc_assert (REG_P (operands[1]));
14607 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14608
14609 switch (GET_CODE (XEXP (operands[0], 0)))
14610 {
14611 case REG:
14612 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14613 break;
14614
14615 default:
14616 gcc_unreachable ();
14617 }
14618 }
14619
14620 return "";
14621 }
14622
14623 /* Output a VFP load or store instruction. */
14624
14625 const char *
14626 output_move_vfp (rtx *operands)
14627 {
14628 rtx reg, mem, addr, ops[2];
14629 int load = REG_P (operands[0]);
14630 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14631 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14632 const char *templ;
14633 char buff[50];
14634 enum machine_mode mode;
14635
14636 reg = operands[!load];
14637 mem = operands[load];
14638
14639 mode = GET_MODE (reg);
14640
14641 gcc_assert (REG_P (reg));
14642 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14643 gcc_assert (mode == SFmode
14644 || mode == DFmode
14645 || mode == SImode
14646 || mode == DImode
14647 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14648 gcc_assert (MEM_P (mem));
14649
14650 addr = XEXP (mem, 0);
14651
14652 switch (GET_CODE (addr))
14653 {
14654 case PRE_DEC:
14655 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14656 ops[0] = XEXP (addr, 0);
14657 ops[1] = reg;
14658 break;
14659
14660 case POST_INC:
14661 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14662 ops[0] = XEXP (addr, 0);
14663 ops[1] = reg;
14664 break;
14665
14666 default:
14667 templ = "f%s%c%%?\t%%%s0, %%1%s";
14668 ops[0] = reg;
14669 ops[1] = mem;
14670 break;
14671 }
14672
14673 sprintf (buff, templ,
14674 load ? "ld" : "st",
14675 dp ? 'd' : 's',
14676 dp ? "P" : "",
14677 integer_p ? "\t%@ int" : "");
14678 output_asm_insn (buff, ops);
14679
14680 return "";
14681 }
14682
14683 /* Output a Neon double-word or quad-word load or store, or a load
14684 or store for larger structure modes.
14685
14686 WARNING: The ordering of elements is weird in big-endian mode,
14687 because the EABI requires that vectors stored in memory appear
14688 as though they were stored by a VSTM, as required by the EABI.
14689 GCC RTL defines element ordering based on in-memory order.
14690 This can be different from the architectural ordering of elements
14691 within a NEON register. The intrinsics defined in arm_neon.h use the
14692 NEON register element ordering, not the GCC RTL element ordering.
14693
14694 For example, the in-memory ordering of a big-endian a quadword
14695 vector with 16-bit elements when stored from register pair {d0,d1}
14696 will be (lowest address first, d0[N] is NEON register element N):
14697
14698 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14699
14700 When necessary, quadword registers (dN, dN+1) are moved to ARM
14701 registers from rN in the order:
14702
14703 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14704
14705 So that STM/LDM can be used on vectors in ARM registers, and the
14706 same memory layout will result as if VSTM/VLDM were used.
14707
14708 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
14709 possible, which allows use of appropriate alignment tags.
14710 Note that the choice of "64" is independent of the actual vector
14711 element size; this size simply ensures that the behavior is
14712 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
14713
14714 Due to limitations of those instructions, use of VST1.64/VLD1.64
14715 is not possible if:
14716 - the address contains PRE_DEC, or
14717 - the mode refers to more than 4 double-word registers
14718
14719 In those cases, it would be possible to replace VSTM/VLDM by a
14720 sequence of instructions; this is not currently implemented since
14721 this is not certain to actually improve performance. */
14722
14723 const char *
14724 output_move_neon (rtx *operands)
14725 {
14726 rtx reg, mem, addr, ops[2];
14727 int regno, nregs, load = REG_P (operands[0]);
14728 const char *templ;
14729 char buff[50];
14730 enum machine_mode mode;
14731
14732 reg = operands[!load];
14733 mem = operands[load];
14734
14735 mode = GET_MODE (reg);
14736
14737 gcc_assert (REG_P (reg));
14738 regno = REGNO (reg);
14739 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
14740 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14741 || NEON_REGNO_OK_FOR_QUAD (regno));
14742 gcc_assert (VALID_NEON_DREG_MODE (mode)
14743 || VALID_NEON_QREG_MODE (mode)
14744 || VALID_NEON_STRUCT_MODE (mode));
14745 gcc_assert (MEM_P (mem));
14746
14747 addr = XEXP (mem, 0);
14748
14749 /* Strip off const from addresses like (const (plus (...))). */
14750 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14751 addr = XEXP (addr, 0);
14752
14753 switch (GET_CODE (addr))
14754 {
14755 case POST_INC:
14756 /* We have to use vldm / vstm for too-large modes. */
14757 if (nregs > 4)
14758 {
14759 templ = "v%smia%%?\t%%0!, %%h1";
14760 ops[0] = XEXP (addr, 0);
14761 }
14762 else
14763 {
14764 templ = "v%s1.64\t%%h1, %%A0";
14765 ops[0] = mem;
14766 }
14767 ops[1] = reg;
14768 break;
14769
14770 case PRE_DEC:
14771 /* We have to use vldm / vstm in this case, since there is no
14772 pre-decrement form of the vld1 / vst1 instructions. */
14773 templ = "v%smdb%%?\t%%0!, %%h1";
14774 ops[0] = XEXP (addr, 0);
14775 ops[1] = reg;
14776 break;
14777
14778 case POST_MODIFY:
14779 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14780 gcc_unreachable ();
14781
14782 case LABEL_REF:
14783 case PLUS:
14784 {
14785 int i;
14786 int overlap = -1;
14787 for (i = 0; i < nregs; i++)
14788 {
14789 /* We're only using DImode here because it's a convenient size. */
14790 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14791 ops[1] = adjust_address (mem, DImode, 8 * i);
14792 if (reg_overlap_mentioned_p (ops[0], mem))
14793 {
14794 gcc_assert (overlap == -1);
14795 overlap = i;
14796 }
14797 else
14798 {
14799 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14800 output_asm_insn (buff, ops);
14801 }
14802 }
14803 if (overlap != -1)
14804 {
14805 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14806 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14807 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14808 output_asm_insn (buff, ops);
14809 }
14810
14811 return "";
14812 }
14813
14814 default:
14815 /* We have to use vldm / vstm for too-large modes. */
14816 if (nregs > 4)
14817 templ = "v%smia%%?\t%%m0, %%h1";
14818 else
14819 templ = "v%s1.64\t%%h1, %%A0";
14820
14821 ops[0] = mem;
14822 ops[1] = reg;
14823 }
14824
14825 sprintf (buff, templ, load ? "ld" : "st");
14826 output_asm_insn (buff, ops);
14827
14828 return "";
14829 }
14830
14831 /* Compute and return the length of neon_mov<mode>, where <mode> is
14832 one of VSTRUCT modes: EI, OI, CI or XI. */
14833 int
14834 arm_attr_length_move_neon (rtx insn)
14835 {
14836 rtx reg, mem, addr;
14837 int load;
14838 enum machine_mode mode;
14839
14840 extract_insn_cached (insn);
14841
14842 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14843 {
14844 mode = GET_MODE (recog_data.operand[0]);
14845 switch (mode)
14846 {
14847 case EImode:
14848 case OImode:
14849 return 8;
14850 case CImode:
14851 return 12;
14852 case XImode:
14853 return 16;
14854 default:
14855 gcc_unreachable ();
14856 }
14857 }
14858
14859 load = REG_P (recog_data.operand[0]);
14860 reg = recog_data.operand[!load];
14861 mem = recog_data.operand[load];
14862
14863 gcc_assert (MEM_P (mem));
14864
14865 mode = GET_MODE (reg);
14866 addr = XEXP (mem, 0);
14867
14868 /* Strip off const from addresses like (const (plus (...))). */
14869 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14870 addr = XEXP (addr, 0);
14871
14872 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14873 {
14874 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14875 return insns * 4;
14876 }
14877 else
14878 return 4;
14879 }
14880
14881 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14882 return zero. */
14883
14884 int
14885 arm_address_offset_is_imm (rtx insn)
14886 {
14887 rtx mem, addr;
14888
14889 extract_insn_cached (insn);
14890
14891 if (REG_P (recog_data.operand[0]))
14892 return 0;
14893
14894 mem = recog_data.operand[0];
14895
14896 gcc_assert (MEM_P (mem));
14897
14898 addr = XEXP (mem, 0);
14899
14900 if (REG_P (addr)
14901 || (GET_CODE (addr) == PLUS
14902 && REG_P (XEXP (addr, 0))
14903 && CONST_INT_P (XEXP (addr, 1))))
14904 return 1;
14905 else
14906 return 0;
14907 }
14908
14909 /* Output an ADD r, s, #n where n may be too big for one instruction.
14910 If adding zero to one register, output nothing. */
14911 const char *
14912 output_add_immediate (rtx *operands)
14913 {
14914 HOST_WIDE_INT n = INTVAL (operands[2]);
14915
14916 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14917 {
14918 if (n < 0)
14919 output_multi_immediate (operands,
14920 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14921 -n);
14922 else
14923 output_multi_immediate (operands,
14924 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14925 n);
14926 }
14927
14928 return "";
14929 }
14930
14931 /* Output a multiple immediate operation.
14932 OPERANDS is the vector of operands referred to in the output patterns.
14933 INSTR1 is the output pattern to use for the first constant.
14934 INSTR2 is the output pattern to use for subsequent constants.
14935 IMMED_OP is the index of the constant slot in OPERANDS.
14936 N is the constant value. */
14937 static const char *
14938 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14939 int immed_op, HOST_WIDE_INT n)
14940 {
14941 #if HOST_BITS_PER_WIDE_INT > 32
14942 n &= 0xffffffff;
14943 #endif
14944
14945 if (n == 0)
14946 {
14947 /* Quick and easy output. */
14948 operands[immed_op] = const0_rtx;
14949 output_asm_insn (instr1, operands);
14950 }
14951 else
14952 {
14953 int i;
14954 const char * instr = instr1;
14955
14956 /* Note that n is never zero here (which would give no output). */
14957 for (i = 0; i < 32; i += 2)
14958 {
14959 if (n & (3 << i))
14960 {
14961 operands[immed_op] = GEN_INT (n & (255 << i));
14962 output_asm_insn (instr, operands);
14963 instr = instr2;
14964 i += 6;
14965 }
14966 }
14967 }
14968
14969 return "";
14970 }
14971
14972 /* Return the name of a shifter operation. */
14973 static const char *
14974 arm_shift_nmem(enum rtx_code code)
14975 {
14976 switch (code)
14977 {
14978 case ASHIFT:
14979 return ARM_LSL_NAME;
14980
14981 case ASHIFTRT:
14982 return "asr";
14983
14984 case LSHIFTRT:
14985 return "lsr";
14986
14987 case ROTATERT:
14988 return "ror";
14989
14990 default:
14991 abort();
14992 }
14993 }
14994
14995 /* Return the appropriate ARM instruction for the operation code.
14996 The returned result should not be overwritten. OP is the rtx of the
14997 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14998 was shifted. */
14999 const char *
15000 arithmetic_instr (rtx op, int shift_first_arg)
15001 {
15002 switch (GET_CODE (op))
15003 {
15004 case PLUS:
15005 return "add";
15006
15007 case MINUS:
15008 return shift_first_arg ? "rsb" : "sub";
15009
15010 case IOR:
15011 return "orr";
15012
15013 case XOR:
15014 return "eor";
15015
15016 case AND:
15017 return "and";
15018
15019 case ASHIFT:
15020 case ASHIFTRT:
15021 case LSHIFTRT:
15022 case ROTATERT:
15023 return arm_shift_nmem(GET_CODE(op));
15024
15025 default:
15026 gcc_unreachable ();
15027 }
15028 }
15029
15030 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15031 for the operation code. The returned result should not be overwritten.
15032 OP is the rtx code of the shift.
15033 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15034 shift. */
15035 static const char *
15036 shift_op (rtx op, HOST_WIDE_INT *amountp)
15037 {
15038 const char * mnem;
15039 enum rtx_code code = GET_CODE (op);
15040
15041 switch (GET_CODE (XEXP (op, 1)))
15042 {
15043 case REG:
15044 case SUBREG:
15045 *amountp = -1;
15046 break;
15047
15048 case CONST_INT:
15049 *amountp = INTVAL (XEXP (op, 1));
15050 break;
15051
15052 default:
15053 gcc_unreachable ();
15054 }
15055
15056 switch (code)
15057 {
15058 case ROTATE:
15059 gcc_assert (*amountp != -1);
15060 *amountp = 32 - *amountp;
15061 code = ROTATERT;
15062
15063 /* Fall through. */
15064
15065 case ASHIFT:
15066 case ASHIFTRT:
15067 case LSHIFTRT:
15068 case ROTATERT:
15069 mnem = arm_shift_nmem(code);
15070 break;
15071
15072 case MULT:
15073 /* We never have to worry about the amount being other than a
15074 power of 2, since this case can never be reloaded from a reg. */
15075 gcc_assert (*amountp != -1);
15076 *amountp = int_log2 (*amountp);
15077 return ARM_LSL_NAME;
15078
15079 default:
15080 gcc_unreachable ();
15081 }
15082
15083 if (*amountp != -1)
15084 {
15085 /* This is not 100% correct, but follows from the desire to merge
15086 multiplication by a power of 2 with the recognizer for a
15087 shift. >=32 is not a valid shift for "lsl", so we must try and
15088 output a shift that produces the correct arithmetical result.
15089 Using lsr #32 is identical except for the fact that the carry bit
15090 is not set correctly if we set the flags; but we never use the
15091 carry bit from such an operation, so we can ignore that. */
15092 if (code == ROTATERT)
15093 /* Rotate is just modulo 32. */
15094 *amountp &= 31;
15095 else if (*amountp != (*amountp & 31))
15096 {
15097 if (code == ASHIFT)
15098 mnem = "lsr";
15099 *amountp = 32;
15100 }
15101
15102 /* Shifts of 0 are no-ops. */
15103 if (*amountp == 0)
15104 return NULL;
15105 }
15106
15107 return mnem;
15108 }
15109
15110 /* Obtain the shift from the POWER of two. */
15111
15112 static HOST_WIDE_INT
15113 int_log2 (HOST_WIDE_INT power)
15114 {
15115 HOST_WIDE_INT shift = 0;
15116
15117 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15118 {
15119 gcc_assert (shift <= 31);
15120 shift++;
15121 }
15122
15123 return shift;
15124 }
15125
15126 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15127 because /bin/as is horribly restrictive. The judgement about
15128 whether or not each character is 'printable' (and can be output as
15129 is) or not (and must be printed with an octal escape) must be made
15130 with reference to the *host* character set -- the situation is
15131 similar to that discussed in the comments above pp_c_char in
15132 c-pretty-print.c. */
15133
15134 #define MAX_ASCII_LEN 51
15135
15136 void
15137 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15138 {
15139 int i;
15140 int len_so_far = 0;
15141
15142 fputs ("\t.ascii\t\"", stream);
15143
15144 for (i = 0; i < len; i++)
15145 {
15146 int c = p[i];
15147
15148 if (len_so_far >= MAX_ASCII_LEN)
15149 {
15150 fputs ("\"\n\t.ascii\t\"", stream);
15151 len_so_far = 0;
15152 }
15153
15154 if (ISPRINT (c))
15155 {
15156 if (c == '\\' || c == '\"')
15157 {
15158 putc ('\\', stream);
15159 len_so_far++;
15160 }
15161 putc (c, stream);
15162 len_so_far++;
15163 }
15164 else
15165 {
15166 fprintf (stream, "\\%03o", c);
15167 len_so_far += 4;
15168 }
15169 }
15170
15171 fputs ("\"\n", stream);
15172 }
15173 \f
15174 /* Compute the register save mask for registers 0 through 12
15175 inclusive. This code is used by arm_compute_save_reg_mask. */
15176
15177 static unsigned long
15178 arm_compute_save_reg0_reg12_mask (void)
15179 {
15180 unsigned long func_type = arm_current_func_type ();
15181 unsigned long save_reg_mask = 0;
15182 unsigned int reg;
15183
15184 if (IS_INTERRUPT (func_type))
15185 {
15186 unsigned int max_reg;
15187 /* Interrupt functions must not corrupt any registers,
15188 even call clobbered ones. If this is a leaf function
15189 we can just examine the registers used by the RTL, but
15190 otherwise we have to assume that whatever function is
15191 called might clobber anything, and so we have to save
15192 all the call-clobbered registers as well. */
15193 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15194 /* FIQ handlers have registers r8 - r12 banked, so
15195 we only need to check r0 - r7, Normal ISRs only
15196 bank r14 and r15, so we must check up to r12.
15197 r13 is the stack pointer which is always preserved,
15198 so we do not need to consider it here. */
15199 max_reg = 7;
15200 else
15201 max_reg = 12;
15202
15203 for (reg = 0; reg <= max_reg; reg++)
15204 if (df_regs_ever_live_p (reg)
15205 || (! crtl->is_leaf && call_used_regs[reg]))
15206 save_reg_mask |= (1 << reg);
15207
15208 /* Also save the pic base register if necessary. */
15209 if (flag_pic
15210 && !TARGET_SINGLE_PIC_BASE
15211 && arm_pic_register != INVALID_REGNUM
15212 && crtl->uses_pic_offset_table)
15213 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15214 }
15215 else if (IS_VOLATILE(func_type))
15216 {
15217 /* For noreturn functions we historically omitted register saves
15218 altogether. However this really messes up debugging. As a
15219 compromise save just the frame pointers. Combined with the link
15220 register saved elsewhere this should be sufficient to get
15221 a backtrace. */
15222 if (frame_pointer_needed)
15223 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15224 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15225 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15226 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15227 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15228 }
15229 else
15230 {
15231 /* In the normal case we only need to save those registers
15232 which are call saved and which are used by this function. */
15233 for (reg = 0; reg <= 11; reg++)
15234 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15235 save_reg_mask |= (1 << reg);
15236
15237 /* Handle the frame pointer as a special case. */
15238 if (frame_pointer_needed)
15239 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15240
15241 /* If we aren't loading the PIC register,
15242 don't stack it even though it may be live. */
15243 if (flag_pic
15244 && !TARGET_SINGLE_PIC_BASE
15245 && arm_pic_register != INVALID_REGNUM
15246 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15247 || crtl->uses_pic_offset_table))
15248 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15249
15250 /* The prologue will copy SP into R0, so save it. */
15251 if (IS_STACKALIGN (func_type))
15252 save_reg_mask |= 1;
15253 }
15254
15255 /* Save registers so the exception handler can modify them. */
15256 if (crtl->calls_eh_return)
15257 {
15258 unsigned int i;
15259
15260 for (i = 0; ; i++)
15261 {
15262 reg = EH_RETURN_DATA_REGNO (i);
15263 if (reg == INVALID_REGNUM)
15264 break;
15265 save_reg_mask |= 1 << reg;
15266 }
15267 }
15268
15269 return save_reg_mask;
15270 }
15271
15272
15273 /* Compute the number of bytes used to store the static chain register on the
15274 stack, above the stack frame. We need to know this accurately to get the
15275 alignment of the rest of the stack frame correct. */
15276
15277 static int arm_compute_static_chain_stack_bytes (void)
15278 {
15279 unsigned long func_type = arm_current_func_type ();
15280 int static_chain_stack_bytes = 0;
15281
15282 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15283 IS_NESTED (func_type) &&
15284 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15285 static_chain_stack_bytes = 4;
15286
15287 return static_chain_stack_bytes;
15288 }
15289
15290
15291 /* Compute a bit mask of which registers need to be
15292 saved on the stack for the current function.
15293 This is used by arm_get_frame_offsets, which may add extra registers. */
15294
15295 static unsigned long
15296 arm_compute_save_reg_mask (void)
15297 {
15298 unsigned int save_reg_mask = 0;
15299 unsigned long func_type = arm_current_func_type ();
15300 unsigned int reg;
15301
15302 if (IS_NAKED (func_type))
15303 /* This should never really happen. */
15304 return 0;
15305
15306 /* If we are creating a stack frame, then we must save the frame pointer,
15307 IP (which will hold the old stack pointer), LR and the PC. */
15308 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15309 save_reg_mask |=
15310 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15311 | (1 << IP_REGNUM)
15312 | (1 << LR_REGNUM)
15313 | (1 << PC_REGNUM);
15314
15315 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15316
15317 /* Decide if we need to save the link register.
15318 Interrupt routines have their own banked link register,
15319 so they never need to save it.
15320 Otherwise if we do not use the link register we do not need to save
15321 it. If we are pushing other registers onto the stack however, we
15322 can save an instruction in the epilogue by pushing the link register
15323 now and then popping it back into the PC. This incurs extra memory
15324 accesses though, so we only do it when optimizing for size, and only
15325 if we know that we will not need a fancy return sequence. */
15326 if (df_regs_ever_live_p (LR_REGNUM)
15327 || (save_reg_mask
15328 && optimize_size
15329 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15330 && !crtl->calls_eh_return))
15331 save_reg_mask |= 1 << LR_REGNUM;
15332
15333 if (cfun->machine->lr_save_eliminated)
15334 save_reg_mask &= ~ (1 << LR_REGNUM);
15335
15336 if (TARGET_REALLY_IWMMXT
15337 && ((bit_count (save_reg_mask)
15338 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15339 arm_compute_static_chain_stack_bytes())
15340 ) % 2) != 0)
15341 {
15342 /* The total number of registers that are going to be pushed
15343 onto the stack is odd. We need to ensure that the stack
15344 is 64-bit aligned before we start to save iWMMXt registers,
15345 and also before we start to create locals. (A local variable
15346 might be a double or long long which we will load/store using
15347 an iWMMXt instruction). Therefore we need to push another
15348 ARM register, so that the stack will be 64-bit aligned. We
15349 try to avoid using the arg registers (r0 -r3) as they might be
15350 used to pass values in a tail call. */
15351 for (reg = 4; reg <= 12; reg++)
15352 if ((save_reg_mask & (1 << reg)) == 0)
15353 break;
15354
15355 if (reg <= 12)
15356 save_reg_mask |= (1 << reg);
15357 else
15358 {
15359 cfun->machine->sibcall_blocked = 1;
15360 save_reg_mask |= (1 << 3);
15361 }
15362 }
15363
15364 /* We may need to push an additional register for use initializing the
15365 PIC base register. */
15366 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15367 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15368 {
15369 reg = thumb_find_work_register (1 << 4);
15370 if (!call_used_regs[reg])
15371 save_reg_mask |= (1 << reg);
15372 }
15373
15374 return save_reg_mask;
15375 }
15376
15377
15378 /* Compute a bit mask of which registers need to be
15379 saved on the stack for the current function. */
15380 static unsigned long
15381 thumb1_compute_save_reg_mask (void)
15382 {
15383 unsigned long mask;
15384 unsigned reg;
15385
15386 mask = 0;
15387 for (reg = 0; reg < 12; reg ++)
15388 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15389 mask |= 1 << reg;
15390
15391 if (flag_pic
15392 && !TARGET_SINGLE_PIC_BASE
15393 && arm_pic_register != INVALID_REGNUM
15394 && crtl->uses_pic_offset_table)
15395 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15396
15397 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15398 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15399 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15400
15401 /* LR will also be pushed if any lo regs are pushed. */
15402 if (mask & 0xff || thumb_force_lr_save ())
15403 mask |= (1 << LR_REGNUM);
15404
15405 /* Make sure we have a low work register if we need one.
15406 We will need one if we are going to push a high register,
15407 but we are not currently intending to push a low register. */
15408 if ((mask & 0xff) == 0
15409 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15410 {
15411 /* Use thumb_find_work_register to choose which register
15412 we will use. If the register is live then we will
15413 have to push it. Use LAST_LO_REGNUM as our fallback
15414 choice for the register to select. */
15415 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15416 /* Make sure the register returned by thumb_find_work_register is
15417 not part of the return value. */
15418 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15419 reg = LAST_LO_REGNUM;
15420
15421 if (! call_used_regs[reg])
15422 mask |= 1 << reg;
15423 }
15424
15425 /* The 504 below is 8 bytes less than 512 because there are two possible
15426 alignment words. We can't tell here if they will be present or not so we
15427 have to play it safe and assume that they are. */
15428 if ((CALLER_INTERWORKING_SLOT_SIZE +
15429 ROUND_UP_WORD (get_frame_size ()) +
15430 crtl->outgoing_args_size) >= 504)
15431 {
15432 /* This is the same as the code in thumb1_expand_prologue() which
15433 determines which register to use for stack decrement. */
15434 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15435 if (mask & (1 << reg))
15436 break;
15437
15438 if (reg > LAST_LO_REGNUM)
15439 {
15440 /* Make sure we have a register available for stack decrement. */
15441 mask |= 1 << LAST_LO_REGNUM;
15442 }
15443 }
15444
15445 return mask;
15446 }
15447
15448
15449 /* Return the number of bytes required to save VFP registers. */
15450 static int
15451 arm_get_vfp_saved_size (void)
15452 {
15453 unsigned int regno;
15454 int count;
15455 int saved;
15456
15457 saved = 0;
15458 /* Space for saved VFP registers. */
15459 if (TARGET_HARD_FLOAT && TARGET_VFP)
15460 {
15461 count = 0;
15462 for (regno = FIRST_VFP_REGNUM;
15463 regno < LAST_VFP_REGNUM;
15464 regno += 2)
15465 {
15466 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15467 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15468 {
15469 if (count > 0)
15470 {
15471 /* Workaround ARM10 VFPr1 bug. */
15472 if (count == 2 && !arm_arch6)
15473 count++;
15474 saved += count * 8;
15475 }
15476 count = 0;
15477 }
15478 else
15479 count++;
15480 }
15481 if (count > 0)
15482 {
15483 if (count == 2 && !arm_arch6)
15484 count++;
15485 saved += count * 8;
15486 }
15487 }
15488 return saved;
15489 }
15490
15491
15492 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15493 everything bar the final return instruction. If simple_return is true,
15494 then do not output epilogue, because it has already been emitted in RTL. */
15495 const char *
15496 output_return_instruction (rtx operand, bool really_return, bool reverse,
15497 bool simple_return)
15498 {
15499 char conditional[10];
15500 char instr[100];
15501 unsigned reg;
15502 unsigned long live_regs_mask;
15503 unsigned long func_type;
15504 arm_stack_offsets *offsets;
15505
15506 func_type = arm_current_func_type ();
15507
15508 if (IS_NAKED (func_type))
15509 return "";
15510
15511 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15512 {
15513 /* If this function was declared non-returning, and we have
15514 found a tail call, then we have to trust that the called
15515 function won't return. */
15516 if (really_return)
15517 {
15518 rtx ops[2];
15519
15520 /* Otherwise, trap an attempted return by aborting. */
15521 ops[0] = operand;
15522 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15523 : "abort");
15524 assemble_external_libcall (ops[1]);
15525 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15526 }
15527
15528 return "";
15529 }
15530
15531 gcc_assert (!cfun->calls_alloca || really_return);
15532
15533 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15534
15535 cfun->machine->return_used_this_function = 1;
15536
15537 offsets = arm_get_frame_offsets ();
15538 live_regs_mask = offsets->saved_regs_mask;
15539
15540 if (!simple_return && live_regs_mask)
15541 {
15542 const char * return_reg;
15543
15544 /* If we do not have any special requirements for function exit
15545 (e.g. interworking) then we can load the return address
15546 directly into the PC. Otherwise we must load it into LR. */
15547 if (really_return
15548 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15549 return_reg = reg_names[PC_REGNUM];
15550 else
15551 return_reg = reg_names[LR_REGNUM];
15552
15553 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15554 {
15555 /* There are three possible reasons for the IP register
15556 being saved. 1) a stack frame was created, in which case
15557 IP contains the old stack pointer, or 2) an ISR routine
15558 corrupted it, or 3) it was saved to align the stack on
15559 iWMMXt. In case 1, restore IP into SP, otherwise just
15560 restore IP. */
15561 if (frame_pointer_needed)
15562 {
15563 live_regs_mask &= ~ (1 << IP_REGNUM);
15564 live_regs_mask |= (1 << SP_REGNUM);
15565 }
15566 else
15567 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15568 }
15569
15570 /* On some ARM architectures it is faster to use LDR rather than
15571 LDM to load a single register. On other architectures, the
15572 cost is the same. In 26 bit mode, or for exception handlers,
15573 we have to use LDM to load the PC so that the CPSR is also
15574 restored. */
15575 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15576 if (live_regs_mask == (1U << reg))
15577 break;
15578
15579 if (reg <= LAST_ARM_REGNUM
15580 && (reg != LR_REGNUM
15581 || ! really_return
15582 || ! IS_INTERRUPT (func_type)))
15583 {
15584 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15585 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15586 }
15587 else
15588 {
15589 char *p;
15590 int first = 1;
15591
15592 /* Generate the load multiple instruction to restore the
15593 registers. Note we can get here, even if
15594 frame_pointer_needed is true, but only if sp already
15595 points to the base of the saved core registers. */
15596 if (live_regs_mask & (1 << SP_REGNUM))
15597 {
15598 unsigned HOST_WIDE_INT stack_adjust;
15599
15600 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15601 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15602
15603 if (stack_adjust && arm_arch5 && TARGET_ARM)
15604 if (TARGET_UNIFIED_ASM)
15605 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15606 else
15607 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15608 else
15609 {
15610 /* If we can't use ldmib (SA110 bug),
15611 then try to pop r3 instead. */
15612 if (stack_adjust)
15613 live_regs_mask |= 1 << 3;
15614
15615 if (TARGET_UNIFIED_ASM)
15616 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15617 else
15618 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15619 }
15620 }
15621 else
15622 if (TARGET_UNIFIED_ASM)
15623 sprintf (instr, "pop%s\t{", conditional);
15624 else
15625 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15626
15627 p = instr + strlen (instr);
15628
15629 for (reg = 0; reg <= SP_REGNUM; reg++)
15630 if (live_regs_mask & (1 << reg))
15631 {
15632 int l = strlen (reg_names[reg]);
15633
15634 if (first)
15635 first = 0;
15636 else
15637 {
15638 memcpy (p, ", ", 2);
15639 p += 2;
15640 }
15641
15642 memcpy (p, "%|", 2);
15643 memcpy (p + 2, reg_names[reg], l);
15644 p += l + 2;
15645 }
15646
15647 if (live_regs_mask & (1 << LR_REGNUM))
15648 {
15649 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15650 /* If returning from an interrupt, restore the CPSR. */
15651 if (IS_INTERRUPT (func_type))
15652 strcat (p, "^");
15653 }
15654 else
15655 strcpy (p, "}");
15656 }
15657
15658 output_asm_insn (instr, & operand);
15659
15660 /* See if we need to generate an extra instruction to
15661 perform the actual function return. */
15662 if (really_return
15663 && func_type != ARM_FT_INTERWORKED
15664 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15665 {
15666 /* The return has already been handled
15667 by loading the LR into the PC. */
15668 return "";
15669 }
15670 }
15671
15672 if (really_return)
15673 {
15674 switch ((int) ARM_FUNC_TYPE (func_type))
15675 {
15676 case ARM_FT_ISR:
15677 case ARM_FT_FIQ:
15678 /* ??? This is wrong for unified assembly syntax. */
15679 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15680 break;
15681
15682 case ARM_FT_INTERWORKED:
15683 sprintf (instr, "bx%s\t%%|lr", conditional);
15684 break;
15685
15686 case ARM_FT_EXCEPTION:
15687 /* ??? This is wrong for unified assembly syntax. */
15688 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15689 break;
15690
15691 default:
15692 /* Use bx if it's available. */
15693 if (arm_arch5 || arm_arch4t)
15694 sprintf (instr, "bx%s\t%%|lr", conditional);
15695 else
15696 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15697 break;
15698 }
15699
15700 output_asm_insn (instr, & operand);
15701 }
15702
15703 return "";
15704 }
15705
15706 /* Write the function name into the code section, directly preceding
15707 the function prologue.
15708
15709 Code will be output similar to this:
15710 t0
15711 .ascii "arm_poke_function_name", 0
15712 .align
15713 t1
15714 .word 0xff000000 + (t1 - t0)
15715 arm_poke_function_name
15716 mov ip, sp
15717 stmfd sp!, {fp, ip, lr, pc}
15718 sub fp, ip, #4
15719
15720 When performing a stack backtrace, code can inspect the value
15721 of 'pc' stored at 'fp' + 0. If the trace function then looks
15722 at location pc - 12 and the top 8 bits are set, then we know
15723 that there is a function name embedded immediately preceding this
15724 location and has length ((pc[-3]) & 0xff000000).
15725
15726 We assume that pc is declared as a pointer to an unsigned long.
15727
15728 It is of no benefit to output the function name if we are assembling
15729 a leaf function. These function types will not contain a stack
15730 backtrace structure, therefore it is not possible to determine the
15731 function name. */
15732 void
15733 arm_poke_function_name (FILE *stream, const char *name)
15734 {
15735 unsigned long alignlength;
15736 unsigned long length;
15737 rtx x;
15738
15739 length = strlen (name) + 1;
15740 alignlength = ROUND_UP_WORD (length);
15741
15742 ASM_OUTPUT_ASCII (stream, name, length);
15743 ASM_OUTPUT_ALIGN (stream, 2);
15744 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15745 assemble_aligned_integer (UNITS_PER_WORD, x);
15746 }
15747
15748 /* Place some comments into the assembler stream
15749 describing the current function. */
15750 static void
15751 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15752 {
15753 unsigned long func_type;
15754
15755 /* ??? Do we want to print some of the below anyway? */
15756 if (TARGET_THUMB1)
15757 return;
15758
15759 /* Sanity check. */
15760 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15761
15762 func_type = arm_current_func_type ();
15763
15764 switch ((int) ARM_FUNC_TYPE (func_type))
15765 {
15766 default:
15767 case ARM_FT_NORMAL:
15768 break;
15769 case ARM_FT_INTERWORKED:
15770 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15771 break;
15772 case ARM_FT_ISR:
15773 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15774 break;
15775 case ARM_FT_FIQ:
15776 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15777 break;
15778 case ARM_FT_EXCEPTION:
15779 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15780 break;
15781 }
15782
15783 if (IS_NAKED (func_type))
15784 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15785
15786 if (IS_VOLATILE (func_type))
15787 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15788
15789 if (IS_NESTED (func_type))
15790 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15791 if (IS_STACKALIGN (func_type))
15792 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15793
15794 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15795 crtl->args.size,
15796 crtl->args.pretend_args_size, frame_size);
15797
15798 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15799 frame_pointer_needed,
15800 cfun->machine->uses_anonymous_args);
15801
15802 if (cfun->machine->lr_save_eliminated)
15803 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15804
15805 if (crtl->calls_eh_return)
15806 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15807
15808 }
15809
15810 static void
15811 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15812 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15813 {
15814 arm_stack_offsets *offsets;
15815
15816 if (TARGET_THUMB1)
15817 {
15818 int regno;
15819
15820 /* Emit any call-via-reg trampolines that are needed for v4t support
15821 of call_reg and call_value_reg type insns. */
15822 for (regno = 0; regno < LR_REGNUM; regno++)
15823 {
15824 rtx label = cfun->machine->call_via[regno];
15825
15826 if (label != NULL)
15827 {
15828 switch_to_section (function_section (current_function_decl));
15829 targetm.asm_out.internal_label (asm_out_file, "L",
15830 CODE_LABEL_NUMBER (label));
15831 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15832 }
15833 }
15834
15835 /* ??? Probably not safe to set this here, since it assumes that a
15836 function will be emitted as assembly immediately after we generate
15837 RTL for it. This does not happen for inline functions. */
15838 cfun->machine->return_used_this_function = 0;
15839 }
15840 else /* TARGET_32BIT */
15841 {
15842 /* We need to take into account any stack-frame rounding. */
15843 offsets = arm_get_frame_offsets ();
15844
15845 gcc_assert (!use_return_insn (FALSE, NULL)
15846 || (cfun->machine->return_used_this_function != 0)
15847 || offsets->saved_regs == offsets->outgoing_args
15848 || frame_pointer_needed);
15849
15850 /* Reset the ARM-specific per-function variables. */
15851 after_arm_reorg = 0;
15852 }
15853 }
15854
15855 /* Generate and emit an insn that we will recognize as a push_multi.
15856 Unfortunately, since this insn does not reflect very well the actual
15857 semantics of the operation, we need to annotate the insn for the benefit
15858 of DWARF2 frame unwind information. */
15859 static rtx
15860 emit_multi_reg_push (unsigned long mask)
15861 {
15862 int num_regs = 0;
15863 int num_dwarf_regs;
15864 int i, j;
15865 rtx par;
15866 rtx dwarf;
15867 int dwarf_par_index;
15868 rtx tmp, reg;
15869
15870 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15871 if (mask & (1 << i))
15872 num_regs++;
15873
15874 gcc_assert (num_regs && num_regs <= 16);
15875
15876 /* We don't record the PC in the dwarf frame information. */
15877 num_dwarf_regs = num_regs;
15878 if (mask & (1 << PC_REGNUM))
15879 num_dwarf_regs--;
15880
15881 /* For the body of the insn we are going to generate an UNSPEC in
15882 parallel with several USEs. This allows the insn to be recognized
15883 by the push_multi pattern in the arm.md file.
15884
15885 The body of the insn looks something like this:
15886
15887 (parallel [
15888 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15889 (const_int:SI <num>)))
15890 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15891 (use (reg:SI XX))
15892 (use (reg:SI YY))
15893 ...
15894 ])
15895
15896 For the frame note however, we try to be more explicit and actually
15897 show each register being stored into the stack frame, plus a (single)
15898 decrement of the stack pointer. We do it this way in order to be
15899 friendly to the stack unwinding code, which only wants to see a single
15900 stack decrement per instruction. The RTL we generate for the note looks
15901 something like this:
15902
15903 (sequence [
15904 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15905 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15906 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15907 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15908 ...
15909 ])
15910
15911 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15912 instead we'd have a parallel expression detailing all
15913 the stores to the various memory addresses so that debug
15914 information is more up-to-date. Remember however while writing
15915 this to take care of the constraints with the push instruction.
15916
15917 Note also that this has to be taken care of for the VFP registers.
15918
15919 For more see PR43399. */
15920
15921 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15922 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15923 dwarf_par_index = 1;
15924
15925 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15926 {
15927 if (mask & (1 << i))
15928 {
15929 reg = gen_rtx_REG (SImode, i);
15930
15931 XVECEXP (par, 0, 0)
15932 = gen_rtx_SET (VOIDmode,
15933 gen_frame_mem
15934 (BLKmode,
15935 gen_rtx_PRE_MODIFY (Pmode,
15936 stack_pointer_rtx,
15937 plus_constant
15938 (Pmode, stack_pointer_rtx,
15939 -4 * num_regs))
15940 ),
15941 gen_rtx_UNSPEC (BLKmode,
15942 gen_rtvec (1, reg),
15943 UNSPEC_PUSH_MULT));
15944
15945 if (i != PC_REGNUM)
15946 {
15947 tmp = gen_rtx_SET (VOIDmode,
15948 gen_frame_mem (SImode, stack_pointer_rtx),
15949 reg);
15950 RTX_FRAME_RELATED_P (tmp) = 1;
15951 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15952 dwarf_par_index++;
15953 }
15954
15955 break;
15956 }
15957 }
15958
15959 for (j = 1, i++; j < num_regs; i++)
15960 {
15961 if (mask & (1 << i))
15962 {
15963 reg = gen_rtx_REG (SImode, i);
15964
15965 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15966
15967 if (i != PC_REGNUM)
15968 {
15969 tmp
15970 = gen_rtx_SET (VOIDmode,
15971 gen_frame_mem
15972 (SImode,
15973 plus_constant (Pmode, stack_pointer_rtx,
15974 4 * j)),
15975 reg);
15976 RTX_FRAME_RELATED_P (tmp) = 1;
15977 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15978 }
15979
15980 j++;
15981 }
15982 }
15983
15984 par = emit_insn (par);
15985
15986 tmp = gen_rtx_SET (VOIDmode,
15987 stack_pointer_rtx,
15988 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15989 RTX_FRAME_RELATED_P (tmp) = 1;
15990 XVECEXP (dwarf, 0, 0) = tmp;
15991
15992 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15993
15994 return par;
15995 }
15996
15997 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15998 SAVED_REGS_MASK shows which registers need to be restored.
15999
16000 Unfortunately, since this insn does not reflect very well the actual
16001 semantics of the operation, we need to annotate the insn for the benefit
16002 of DWARF2 frame unwind information. */
16003 static void
16004 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
16005 {
16006 int num_regs = 0;
16007 int i, j;
16008 rtx par;
16009 rtx dwarf = NULL_RTX;
16010 rtx tmp, reg;
16011 bool return_in_pc;
16012 int offset_adj;
16013 int emit_update;
16014
16015 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16016 offset_adj = return_in_pc ? 1 : 0;
16017 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16018 if (saved_regs_mask & (1 << i))
16019 num_regs++;
16020
16021 gcc_assert (num_regs && num_regs <= 16);
16022
16023 /* If SP is in reglist, then we don't emit SP update insn. */
16024 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
16025
16026 /* The parallel needs to hold num_regs SETs
16027 and one SET for the stack update. */
16028 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
16029
16030 if (return_in_pc)
16031 {
16032 tmp = ret_rtx;
16033 XVECEXP (par, 0, 0) = tmp;
16034 }
16035
16036 if (emit_update)
16037 {
16038 /* Increment the stack pointer, based on there being
16039 num_regs 4-byte registers to restore. */
16040 tmp = gen_rtx_SET (VOIDmode,
16041 stack_pointer_rtx,
16042 plus_constant (Pmode,
16043 stack_pointer_rtx,
16044 4 * num_regs));
16045 RTX_FRAME_RELATED_P (tmp) = 1;
16046 XVECEXP (par, 0, offset_adj) = tmp;
16047 }
16048
16049 /* Now restore every reg, which may include PC. */
16050 for (j = 0, i = 0; j < num_regs; i++)
16051 if (saved_regs_mask & (1 << i))
16052 {
16053 reg = gen_rtx_REG (SImode, i);
16054 tmp = gen_rtx_SET (VOIDmode,
16055 reg,
16056 gen_frame_mem
16057 (SImode,
16058 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
16059 RTX_FRAME_RELATED_P (tmp) = 1;
16060 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
16061
16062 /* We need to maintain a sequence for DWARF info too. As dwarf info
16063 should not have PC, skip PC. */
16064 if (i != PC_REGNUM)
16065 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16066
16067 j++;
16068 }
16069
16070 if (return_in_pc)
16071 par = emit_jump_insn (par);
16072 else
16073 par = emit_insn (par);
16074
16075 REG_NOTES (par) = dwarf;
16076 }
16077
16078 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16079 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16080
16081 Unfortunately, since this insn does not reflect very well the actual
16082 semantics of the operation, we need to annotate the insn for the benefit
16083 of DWARF2 frame unwind information. */
16084 static void
16085 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
16086 {
16087 int i, j;
16088 rtx par;
16089 rtx dwarf = NULL_RTX;
16090 rtx tmp, reg;
16091
16092 gcc_assert (num_regs && num_regs <= 32);
16093
16094 /* Workaround ARM10 VFPr1 bug. */
16095 if (num_regs == 2 && !arm_arch6)
16096 {
16097 if (first_reg == 15)
16098 first_reg--;
16099
16100 num_regs++;
16101 }
16102
16103 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16104 there could be up to 32 D-registers to restore.
16105 If there are more than 16 D-registers, make two recursive calls,
16106 each of which emits one pop_multi instruction. */
16107 if (num_regs > 16)
16108 {
16109 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16110 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16111 return;
16112 }
16113
16114 /* The parallel needs to hold num_regs SETs
16115 and one SET for the stack update. */
16116 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16117
16118 /* Increment the stack pointer, based on there being
16119 num_regs 8-byte registers to restore. */
16120 tmp = gen_rtx_SET (VOIDmode,
16121 base_reg,
16122 plus_constant (Pmode, base_reg, 8 * num_regs));
16123 RTX_FRAME_RELATED_P (tmp) = 1;
16124 XVECEXP (par, 0, 0) = tmp;
16125
16126 /* Now show every reg that will be restored, using a SET for each. */
16127 for (j = 0, i=first_reg; j < num_regs; i += 2)
16128 {
16129 reg = gen_rtx_REG (DFmode, i);
16130
16131 tmp = gen_rtx_SET (VOIDmode,
16132 reg,
16133 gen_frame_mem
16134 (DFmode,
16135 plus_constant (Pmode, base_reg, 8 * j)));
16136 RTX_FRAME_RELATED_P (tmp) = 1;
16137 XVECEXP (par, 0, j + 1) = tmp;
16138
16139 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16140
16141 j++;
16142 }
16143
16144 par = emit_insn (par);
16145 REG_NOTES (par) = dwarf;
16146 }
16147
16148 /* Calculate the size of the return value that is passed in registers. */
16149 static unsigned
16150 arm_size_return_regs (void)
16151 {
16152 enum machine_mode mode;
16153
16154 if (crtl->return_rtx != 0)
16155 mode = GET_MODE (crtl->return_rtx);
16156 else
16157 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16158
16159 return GET_MODE_SIZE (mode);
16160 }
16161
16162 /* Return true if the current function needs to save/restore LR. */
16163 static bool
16164 thumb_force_lr_save (void)
16165 {
16166 return !cfun->machine->lr_save_eliminated
16167 && (!leaf_function_p ()
16168 || thumb_far_jump_used_p ()
16169 || df_regs_ever_live_p (LR_REGNUM));
16170 }
16171
16172
16173 /* Return true if r3 is used by any of the tail call insns in the
16174 current function. */
16175 static bool
16176 any_sibcall_uses_r3 (void)
16177 {
16178 edge_iterator ei;
16179 edge e;
16180
16181 if (!crtl->tail_call_emit)
16182 return false;
16183 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16184 if (e->flags & EDGE_SIBCALL)
16185 {
16186 rtx call = BB_END (e->src);
16187 if (!CALL_P (call))
16188 call = prev_nonnote_nondebug_insn (call);
16189 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16190 if (find_regno_fusage (call, USE, 3))
16191 return true;
16192 }
16193 return false;
16194 }
16195
16196
16197 /* Compute the distance from register FROM to register TO.
16198 These can be the arg pointer (26), the soft frame pointer (25),
16199 the stack pointer (13) or the hard frame pointer (11).
16200 In thumb mode r7 is used as the soft frame pointer, if needed.
16201 Typical stack layout looks like this:
16202
16203 old stack pointer -> | |
16204 ----
16205 | | \
16206 | | saved arguments for
16207 | | vararg functions
16208 | | /
16209 --
16210 hard FP & arg pointer -> | | \
16211 | | stack
16212 | | frame
16213 | | /
16214 --
16215 | | \
16216 | | call saved
16217 | | registers
16218 soft frame pointer -> | | /
16219 --
16220 | | \
16221 | | local
16222 | | variables
16223 locals base pointer -> | | /
16224 --
16225 | | \
16226 | | outgoing
16227 | | arguments
16228 current stack pointer -> | | /
16229 --
16230
16231 For a given function some or all of these stack components
16232 may not be needed, giving rise to the possibility of
16233 eliminating some of the registers.
16234
16235 The values returned by this function must reflect the behavior
16236 of arm_expand_prologue() and arm_compute_save_reg_mask().
16237
16238 The sign of the number returned reflects the direction of stack
16239 growth, so the values are positive for all eliminations except
16240 from the soft frame pointer to the hard frame pointer.
16241
16242 SFP may point just inside the local variables block to ensure correct
16243 alignment. */
16244
16245
16246 /* Calculate stack offsets. These are used to calculate register elimination
16247 offsets and in prologue/epilogue code. Also calculates which registers
16248 should be saved. */
16249
16250 static arm_stack_offsets *
16251 arm_get_frame_offsets (void)
16252 {
16253 struct arm_stack_offsets *offsets;
16254 unsigned long func_type;
16255 int leaf;
16256 int saved;
16257 int core_saved;
16258 HOST_WIDE_INT frame_size;
16259 int i;
16260
16261 offsets = &cfun->machine->stack_offsets;
16262
16263 /* We need to know if we are a leaf function. Unfortunately, it
16264 is possible to be called after start_sequence has been called,
16265 which causes get_insns to return the insns for the sequence,
16266 not the function, which will cause leaf_function_p to return
16267 the incorrect result.
16268
16269 to know about leaf functions once reload has completed, and the
16270 frame size cannot be changed after that time, so we can safely
16271 use the cached value. */
16272
16273 if (reload_completed)
16274 return offsets;
16275
16276 /* Initially this is the size of the local variables. It will translated
16277 into an offset once we have determined the size of preceding data. */
16278 frame_size = ROUND_UP_WORD (get_frame_size ());
16279
16280 leaf = leaf_function_p ();
16281
16282 /* Space for variadic functions. */
16283 offsets->saved_args = crtl->args.pretend_args_size;
16284
16285 /* In Thumb mode this is incorrect, but never used. */
16286 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16287 arm_compute_static_chain_stack_bytes();
16288
16289 if (TARGET_32BIT)
16290 {
16291 unsigned int regno;
16292
16293 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16294 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16295 saved = core_saved;
16296
16297 /* We know that SP will be doubleword aligned on entry, and we must
16298 preserve that condition at any subroutine call. We also require the
16299 soft frame pointer to be doubleword aligned. */
16300
16301 if (TARGET_REALLY_IWMMXT)
16302 {
16303 /* Check for the call-saved iWMMXt registers. */
16304 for (regno = FIRST_IWMMXT_REGNUM;
16305 regno <= LAST_IWMMXT_REGNUM;
16306 regno++)
16307 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16308 saved += 8;
16309 }
16310
16311 func_type = arm_current_func_type ();
16312 /* Space for saved VFP registers. */
16313 if (! IS_VOLATILE (func_type)
16314 && TARGET_HARD_FLOAT && TARGET_VFP)
16315 saved += arm_get_vfp_saved_size ();
16316 }
16317 else /* TARGET_THUMB1 */
16318 {
16319 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16320 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16321 saved = core_saved;
16322 if (TARGET_BACKTRACE)
16323 saved += 16;
16324 }
16325
16326 /* Saved registers include the stack frame. */
16327 offsets->saved_regs = offsets->saved_args + saved +
16328 arm_compute_static_chain_stack_bytes();
16329 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16330 /* A leaf function does not need any stack alignment if it has nothing
16331 on the stack. */
16332 if (leaf && frame_size == 0
16333 /* However if it calls alloca(), we have a dynamically allocated
16334 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16335 && ! cfun->calls_alloca)
16336 {
16337 offsets->outgoing_args = offsets->soft_frame;
16338 offsets->locals_base = offsets->soft_frame;
16339 return offsets;
16340 }
16341
16342 /* Ensure SFP has the correct alignment. */
16343 if (ARM_DOUBLEWORD_ALIGN
16344 && (offsets->soft_frame & 7))
16345 {
16346 offsets->soft_frame += 4;
16347 /* Try to align stack by pushing an extra reg. Don't bother doing this
16348 when there is a stack frame as the alignment will be rolled into
16349 the normal stack adjustment. */
16350 if (frame_size + crtl->outgoing_args_size == 0)
16351 {
16352 int reg = -1;
16353
16354 /* If it is safe to use r3, then do so. This sometimes
16355 generates better code on Thumb-2 by avoiding the need to
16356 use 32-bit push/pop instructions. */
16357 if (! any_sibcall_uses_r3 ()
16358 && arm_size_return_regs () <= 12
16359 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16360 {
16361 reg = 3;
16362 }
16363 else
16364 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16365 {
16366 /* Avoid fixed registers; they may be changed at
16367 arbitrary times so it's unsafe to restore them
16368 during the epilogue. */
16369 if (!fixed_regs[i]
16370 && (offsets->saved_regs_mask & (1 << i)) == 0)
16371 {
16372 reg = i;
16373 break;
16374 }
16375 }
16376
16377 if (reg != -1)
16378 {
16379 offsets->saved_regs += 4;
16380 offsets->saved_regs_mask |= (1 << reg);
16381 }
16382 }
16383 }
16384
16385 offsets->locals_base = offsets->soft_frame + frame_size;
16386 offsets->outgoing_args = (offsets->locals_base
16387 + crtl->outgoing_args_size);
16388
16389 if (ARM_DOUBLEWORD_ALIGN)
16390 {
16391 /* Ensure SP remains doubleword aligned. */
16392 if (offsets->outgoing_args & 7)
16393 offsets->outgoing_args += 4;
16394 gcc_assert (!(offsets->outgoing_args & 7));
16395 }
16396
16397 return offsets;
16398 }
16399
16400
16401 /* Calculate the relative offsets for the different stack pointers. Positive
16402 offsets are in the direction of stack growth. */
16403
16404 HOST_WIDE_INT
16405 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16406 {
16407 arm_stack_offsets *offsets;
16408
16409 offsets = arm_get_frame_offsets ();
16410
16411 /* OK, now we have enough information to compute the distances.
16412 There must be an entry in these switch tables for each pair
16413 of registers in ELIMINABLE_REGS, even if some of the entries
16414 seem to be redundant or useless. */
16415 switch (from)
16416 {
16417 case ARG_POINTER_REGNUM:
16418 switch (to)
16419 {
16420 case THUMB_HARD_FRAME_POINTER_REGNUM:
16421 return 0;
16422
16423 case FRAME_POINTER_REGNUM:
16424 /* This is the reverse of the soft frame pointer
16425 to hard frame pointer elimination below. */
16426 return offsets->soft_frame - offsets->saved_args;
16427
16428 case ARM_HARD_FRAME_POINTER_REGNUM:
16429 /* This is only non-zero in the case where the static chain register
16430 is stored above the frame. */
16431 return offsets->frame - offsets->saved_args - 4;
16432
16433 case STACK_POINTER_REGNUM:
16434 /* If nothing has been pushed on the stack at all
16435 then this will return -4. This *is* correct! */
16436 return offsets->outgoing_args - (offsets->saved_args + 4);
16437
16438 default:
16439 gcc_unreachable ();
16440 }
16441 gcc_unreachable ();
16442
16443 case FRAME_POINTER_REGNUM:
16444 switch (to)
16445 {
16446 case THUMB_HARD_FRAME_POINTER_REGNUM:
16447 return 0;
16448
16449 case ARM_HARD_FRAME_POINTER_REGNUM:
16450 /* The hard frame pointer points to the top entry in the
16451 stack frame. The soft frame pointer to the bottom entry
16452 in the stack frame. If there is no stack frame at all,
16453 then they are identical. */
16454
16455 return offsets->frame - offsets->soft_frame;
16456
16457 case STACK_POINTER_REGNUM:
16458 return offsets->outgoing_args - offsets->soft_frame;
16459
16460 default:
16461 gcc_unreachable ();
16462 }
16463 gcc_unreachable ();
16464
16465 default:
16466 /* You cannot eliminate from the stack pointer.
16467 In theory you could eliminate from the hard frame
16468 pointer to the stack pointer, but this will never
16469 happen, since if a stack frame is not needed the
16470 hard frame pointer will never be used. */
16471 gcc_unreachable ();
16472 }
16473 }
16474
16475 /* Given FROM and TO register numbers, say whether this elimination is
16476 allowed. Frame pointer elimination is automatically handled.
16477
16478 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16479 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16480 pointer, we must eliminate FRAME_POINTER_REGNUM into
16481 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16482 ARG_POINTER_REGNUM. */
16483
16484 bool
16485 arm_can_eliminate (const int from, const int to)
16486 {
16487 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16488 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16489 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16490 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16491 true);
16492 }
16493
16494 /* Emit RTL to save coprocessor registers on function entry. Returns the
16495 number of bytes pushed. */
16496
16497 static int
16498 arm_save_coproc_regs(void)
16499 {
16500 int saved_size = 0;
16501 unsigned reg;
16502 unsigned start_reg;
16503 rtx insn;
16504
16505 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16506 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16507 {
16508 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16509 insn = gen_rtx_MEM (V2SImode, insn);
16510 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16511 RTX_FRAME_RELATED_P (insn) = 1;
16512 saved_size += 8;
16513 }
16514
16515 if (TARGET_HARD_FLOAT && TARGET_VFP)
16516 {
16517 start_reg = FIRST_VFP_REGNUM;
16518
16519 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16520 {
16521 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16522 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16523 {
16524 if (start_reg != reg)
16525 saved_size += vfp_emit_fstmd (start_reg,
16526 (reg - start_reg) / 2);
16527 start_reg = reg + 2;
16528 }
16529 }
16530 if (start_reg != reg)
16531 saved_size += vfp_emit_fstmd (start_reg,
16532 (reg - start_reg) / 2);
16533 }
16534 return saved_size;
16535 }
16536
16537
16538 /* Set the Thumb frame pointer from the stack pointer. */
16539
16540 static void
16541 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16542 {
16543 HOST_WIDE_INT amount;
16544 rtx insn, dwarf;
16545
16546 amount = offsets->outgoing_args - offsets->locals_base;
16547 if (amount < 1024)
16548 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16549 stack_pointer_rtx, GEN_INT (amount)));
16550 else
16551 {
16552 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16553 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16554 expects the first two operands to be the same. */
16555 if (TARGET_THUMB2)
16556 {
16557 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16558 stack_pointer_rtx,
16559 hard_frame_pointer_rtx));
16560 }
16561 else
16562 {
16563 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16564 hard_frame_pointer_rtx,
16565 stack_pointer_rtx));
16566 }
16567 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16568 plus_constant (Pmode, stack_pointer_rtx, amount));
16569 RTX_FRAME_RELATED_P (dwarf) = 1;
16570 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16571 }
16572
16573 RTX_FRAME_RELATED_P (insn) = 1;
16574 }
16575
16576 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16577 function. */
16578 void
16579 arm_expand_prologue (void)
16580 {
16581 rtx amount;
16582 rtx insn;
16583 rtx ip_rtx;
16584 unsigned long live_regs_mask;
16585 unsigned long func_type;
16586 int fp_offset = 0;
16587 int saved_pretend_args = 0;
16588 int saved_regs = 0;
16589 unsigned HOST_WIDE_INT args_to_push;
16590 arm_stack_offsets *offsets;
16591
16592 func_type = arm_current_func_type ();
16593
16594 /* Naked functions don't have prologues. */
16595 if (IS_NAKED (func_type))
16596 return;
16597
16598 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16599 args_to_push = crtl->args.pretend_args_size;
16600
16601 /* Compute which register we will have to save onto the stack. */
16602 offsets = arm_get_frame_offsets ();
16603 live_regs_mask = offsets->saved_regs_mask;
16604
16605 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16606
16607 if (IS_STACKALIGN (func_type))
16608 {
16609 rtx r0, r1;
16610
16611 /* Handle a word-aligned stack pointer. We generate the following:
16612
16613 mov r0, sp
16614 bic r1, r0, #7
16615 mov sp, r1
16616 <save and restore r0 in normal prologue/epilogue>
16617 mov sp, r0
16618 bx lr
16619
16620 The unwinder doesn't need to know about the stack realignment.
16621 Just tell it we saved SP in r0. */
16622 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16623
16624 r0 = gen_rtx_REG (SImode, 0);
16625 r1 = gen_rtx_REG (SImode, 1);
16626
16627 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16628 RTX_FRAME_RELATED_P (insn) = 1;
16629 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16630
16631 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16632
16633 /* ??? The CFA changes here, which may cause GDB to conclude that it
16634 has entered a different function. That said, the unwind info is
16635 correct, individually, before and after this instruction because
16636 we've described the save of SP, which will override the default
16637 handling of SP as restoring from the CFA. */
16638 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16639 }
16640
16641 /* For APCS frames, if IP register is clobbered
16642 when creating frame, save that register in a special
16643 way. */
16644 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16645 {
16646 if (IS_INTERRUPT (func_type))
16647 {
16648 /* Interrupt functions must not corrupt any registers.
16649 Creating a frame pointer however, corrupts the IP
16650 register, so we must push it first. */
16651 emit_multi_reg_push (1 << IP_REGNUM);
16652
16653 /* Do not set RTX_FRAME_RELATED_P on this insn.
16654 The dwarf stack unwinding code only wants to see one
16655 stack decrement per function, and this is not it. If
16656 this instruction is labeled as being part of the frame
16657 creation sequence then dwarf2out_frame_debug_expr will
16658 die when it encounters the assignment of IP to FP
16659 later on, since the use of SP here establishes SP as
16660 the CFA register and not IP.
16661
16662 Anyway this instruction is not really part of the stack
16663 frame creation although it is part of the prologue. */
16664 }
16665 else if (IS_NESTED (func_type))
16666 {
16667 /* The Static chain register is the same as the IP register
16668 used as a scratch register during stack frame creation.
16669 To get around this need to find somewhere to store IP
16670 whilst the frame is being created. We try the following
16671 places in order:
16672
16673 1. The last argument register.
16674 2. A slot on the stack above the frame. (This only
16675 works if the function is not a varargs function).
16676 3. Register r3, after pushing the argument registers
16677 onto the stack.
16678
16679 Note - we only need to tell the dwarf2 backend about the SP
16680 adjustment in the second variant; the static chain register
16681 doesn't need to be unwound, as it doesn't contain a value
16682 inherited from the caller. */
16683
16684 if (df_regs_ever_live_p (3) == false)
16685 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16686 else if (args_to_push == 0)
16687 {
16688 rtx dwarf;
16689
16690 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16691 saved_regs += 4;
16692
16693 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16694 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16695 fp_offset = 4;
16696
16697 /* Just tell the dwarf backend that we adjusted SP. */
16698 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16699 plus_constant (Pmode, stack_pointer_rtx,
16700 -fp_offset));
16701 RTX_FRAME_RELATED_P (insn) = 1;
16702 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16703 }
16704 else
16705 {
16706 /* Store the args on the stack. */
16707 if (cfun->machine->uses_anonymous_args)
16708 insn = emit_multi_reg_push
16709 ((0xf0 >> (args_to_push / 4)) & 0xf);
16710 else
16711 insn = emit_insn
16712 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16713 GEN_INT (- args_to_push)));
16714
16715 RTX_FRAME_RELATED_P (insn) = 1;
16716
16717 saved_pretend_args = 1;
16718 fp_offset = args_to_push;
16719 args_to_push = 0;
16720
16721 /* Now reuse r3 to preserve IP. */
16722 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16723 }
16724 }
16725
16726 insn = emit_set_insn (ip_rtx,
16727 plus_constant (Pmode, stack_pointer_rtx,
16728 fp_offset));
16729 RTX_FRAME_RELATED_P (insn) = 1;
16730 }
16731
16732 if (args_to_push)
16733 {
16734 /* Push the argument registers, or reserve space for them. */
16735 if (cfun->machine->uses_anonymous_args)
16736 insn = emit_multi_reg_push
16737 ((0xf0 >> (args_to_push / 4)) & 0xf);
16738 else
16739 insn = emit_insn
16740 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16741 GEN_INT (- args_to_push)));
16742 RTX_FRAME_RELATED_P (insn) = 1;
16743 }
16744
16745 /* If this is an interrupt service routine, and the link register
16746 is going to be pushed, and we're not generating extra
16747 push of IP (needed when frame is needed and frame layout if apcs),
16748 subtracting four from LR now will mean that the function return
16749 can be done with a single instruction. */
16750 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16751 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16752 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16753 && TARGET_ARM)
16754 {
16755 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16756
16757 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16758 }
16759
16760 if (live_regs_mask)
16761 {
16762 saved_regs += bit_count (live_regs_mask) * 4;
16763 if (optimize_size && !frame_pointer_needed
16764 && saved_regs == offsets->saved_regs - offsets->saved_args)
16765 {
16766 /* If no coprocessor registers are being pushed and we don't have
16767 to worry about a frame pointer then push extra registers to
16768 create the stack frame. This is done is a way that does not
16769 alter the frame layout, so is independent of the epilogue. */
16770 int n;
16771 int frame;
16772 n = 0;
16773 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16774 n++;
16775 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16776 if (frame && n * 4 >= frame)
16777 {
16778 n = frame / 4;
16779 live_regs_mask |= (1 << n) - 1;
16780 saved_regs += frame;
16781 }
16782 }
16783 insn = emit_multi_reg_push (live_regs_mask);
16784 RTX_FRAME_RELATED_P (insn) = 1;
16785 }
16786
16787 if (! IS_VOLATILE (func_type))
16788 saved_regs += arm_save_coproc_regs ();
16789
16790 if (frame_pointer_needed && TARGET_ARM)
16791 {
16792 /* Create the new frame pointer. */
16793 if (TARGET_APCS_FRAME)
16794 {
16795 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16796 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16797 RTX_FRAME_RELATED_P (insn) = 1;
16798
16799 if (IS_NESTED (func_type))
16800 {
16801 /* Recover the static chain register. */
16802 if (!df_regs_ever_live_p (3)
16803 || saved_pretend_args)
16804 insn = gen_rtx_REG (SImode, 3);
16805 else /* if (crtl->args.pretend_args_size == 0) */
16806 {
16807 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16808 insn = gen_frame_mem (SImode, insn);
16809 }
16810 emit_set_insn (ip_rtx, insn);
16811 /* Add a USE to stop propagate_one_insn() from barfing. */
16812 emit_insn (gen_force_register_use (ip_rtx));
16813 }
16814 }
16815 else
16816 {
16817 insn = GEN_INT (saved_regs - 4);
16818 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16819 stack_pointer_rtx, insn));
16820 RTX_FRAME_RELATED_P (insn) = 1;
16821 }
16822 }
16823
16824 if (flag_stack_usage_info)
16825 current_function_static_stack_size
16826 = offsets->outgoing_args - offsets->saved_args;
16827
16828 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16829 {
16830 /* This add can produce multiple insns for a large constant, so we
16831 need to get tricky. */
16832 rtx last = get_last_insn ();
16833
16834 amount = GEN_INT (offsets->saved_args + saved_regs
16835 - offsets->outgoing_args);
16836
16837 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16838 amount));
16839 do
16840 {
16841 last = last ? NEXT_INSN (last) : get_insns ();
16842 RTX_FRAME_RELATED_P (last) = 1;
16843 }
16844 while (last != insn);
16845
16846 /* If the frame pointer is needed, emit a special barrier that
16847 will prevent the scheduler from moving stores to the frame
16848 before the stack adjustment. */
16849 if (frame_pointer_needed)
16850 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16851 hard_frame_pointer_rtx));
16852 }
16853
16854
16855 if (frame_pointer_needed && TARGET_THUMB2)
16856 thumb_set_frame_pointer (offsets);
16857
16858 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16859 {
16860 unsigned long mask;
16861
16862 mask = live_regs_mask;
16863 mask &= THUMB2_WORK_REGS;
16864 if (!IS_NESTED (func_type))
16865 mask |= (1 << IP_REGNUM);
16866 arm_load_pic_register (mask);
16867 }
16868
16869 /* If we are profiling, make sure no instructions are scheduled before
16870 the call to mcount. Similarly if the user has requested no
16871 scheduling in the prolog. Similarly if we want non-call exceptions
16872 using the EABI unwinder, to prevent faulting instructions from being
16873 swapped with a stack adjustment. */
16874 if (crtl->profile || !TARGET_SCHED_PROLOG
16875 || (arm_except_unwind_info (&global_options) == UI_TARGET
16876 && cfun->can_throw_non_call_exceptions))
16877 emit_insn (gen_blockage ());
16878
16879 /* If the link register is being kept alive, with the return address in it,
16880 then make sure that it does not get reused by the ce2 pass. */
16881 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16882 cfun->machine->lr_save_eliminated = 1;
16883 }
16884 \f
16885 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16886 static void
16887 arm_print_condition (FILE *stream)
16888 {
16889 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16890 {
16891 /* Branch conversion is not implemented for Thumb-2. */
16892 if (TARGET_THUMB)
16893 {
16894 output_operand_lossage ("predicated Thumb instruction");
16895 return;
16896 }
16897 if (current_insn_predicate != NULL)
16898 {
16899 output_operand_lossage
16900 ("predicated instruction in conditional sequence");
16901 return;
16902 }
16903
16904 fputs (arm_condition_codes[arm_current_cc], stream);
16905 }
16906 else if (current_insn_predicate)
16907 {
16908 enum arm_cond_code code;
16909
16910 if (TARGET_THUMB1)
16911 {
16912 output_operand_lossage ("predicated Thumb instruction");
16913 return;
16914 }
16915
16916 code = get_arm_condition_code (current_insn_predicate);
16917 fputs (arm_condition_codes[code], stream);
16918 }
16919 }
16920
16921
16922 /* If CODE is 'd', then the X is a condition operand and the instruction
16923 should only be executed if the condition is true.
16924 if CODE is 'D', then the X is a condition operand and the instruction
16925 should only be executed if the condition is false: however, if the mode
16926 of the comparison is CCFPEmode, then always execute the instruction -- we
16927 do this because in these circumstances !GE does not necessarily imply LT;
16928 in these cases the instruction pattern will take care to make sure that
16929 an instruction containing %d will follow, thereby undoing the effects of
16930 doing this instruction unconditionally.
16931 If CODE is 'N' then X is a floating point operand that must be negated
16932 before output.
16933 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16934 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16935 static void
16936 arm_print_operand (FILE *stream, rtx x, int code)
16937 {
16938 switch (code)
16939 {
16940 case '@':
16941 fputs (ASM_COMMENT_START, stream);
16942 return;
16943
16944 case '_':
16945 fputs (user_label_prefix, stream);
16946 return;
16947
16948 case '|':
16949 fputs (REGISTER_PREFIX, stream);
16950 return;
16951
16952 case '?':
16953 arm_print_condition (stream);
16954 return;
16955
16956 case '(':
16957 /* Nothing in unified syntax, otherwise the current condition code. */
16958 if (!TARGET_UNIFIED_ASM)
16959 arm_print_condition (stream);
16960 break;
16961
16962 case ')':
16963 /* The current condition code in unified syntax, otherwise nothing. */
16964 if (TARGET_UNIFIED_ASM)
16965 arm_print_condition (stream);
16966 break;
16967
16968 case '.':
16969 /* The current condition code for a condition code setting instruction.
16970 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16971 if (TARGET_UNIFIED_ASM)
16972 {
16973 fputc('s', stream);
16974 arm_print_condition (stream);
16975 }
16976 else
16977 {
16978 arm_print_condition (stream);
16979 fputc('s', stream);
16980 }
16981 return;
16982
16983 case '!':
16984 /* If the instruction is conditionally executed then print
16985 the current condition code, otherwise print 's'. */
16986 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16987 if (current_insn_predicate)
16988 arm_print_condition (stream);
16989 else
16990 fputc('s', stream);
16991 break;
16992
16993 /* %# is a "break" sequence. It doesn't output anything, but is used to
16994 separate e.g. operand numbers from following text, if that text consists
16995 of further digits which we don't want to be part of the operand
16996 number. */
16997 case '#':
16998 return;
16999
17000 case 'N':
17001 {
17002 REAL_VALUE_TYPE r;
17003 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17004 r = real_value_negate (&r);
17005 fprintf (stream, "%s", fp_const_from_val (&r));
17006 }
17007 return;
17008
17009 /* An integer or symbol address without a preceding # sign. */
17010 case 'c':
17011 switch (GET_CODE (x))
17012 {
17013 case CONST_INT:
17014 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17015 break;
17016
17017 case SYMBOL_REF:
17018 output_addr_const (stream, x);
17019 break;
17020
17021 case CONST:
17022 if (GET_CODE (XEXP (x, 0)) == PLUS
17023 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17024 {
17025 output_addr_const (stream, x);
17026 break;
17027 }
17028 /* Fall through. */
17029
17030 default:
17031 output_operand_lossage ("Unsupported operand for code '%c'", code);
17032 }
17033 return;
17034
17035 /* An integer that we want to print in HEX. */
17036 case 'x':
17037 switch (GET_CODE (x))
17038 {
17039 case CONST_INT:
17040 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17041 break;
17042
17043 default:
17044 output_operand_lossage ("Unsupported operand for code '%c'", code);
17045 }
17046 return;
17047
17048 case 'B':
17049 if (CONST_INT_P (x))
17050 {
17051 HOST_WIDE_INT val;
17052 val = ARM_SIGN_EXTEND (~INTVAL (x));
17053 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17054 }
17055 else
17056 {
17057 putc ('~', stream);
17058 output_addr_const (stream, x);
17059 }
17060 return;
17061
17062 case 'L':
17063 /* The low 16 bits of an immediate constant. */
17064 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17065 return;
17066
17067 case 'i':
17068 fprintf (stream, "%s", arithmetic_instr (x, 1));
17069 return;
17070
17071 case 'I':
17072 fprintf (stream, "%s", arithmetic_instr (x, 0));
17073 return;
17074
17075 case 'S':
17076 {
17077 HOST_WIDE_INT val;
17078 const char *shift;
17079
17080 if (!shift_operator (x, SImode))
17081 {
17082 output_operand_lossage ("invalid shift operand");
17083 break;
17084 }
17085
17086 shift = shift_op (x, &val);
17087
17088 if (shift)
17089 {
17090 fprintf (stream, ", %s ", shift);
17091 if (val == -1)
17092 arm_print_operand (stream, XEXP (x, 1), 0);
17093 else
17094 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17095 }
17096 }
17097 return;
17098
17099 /* An explanation of the 'Q', 'R' and 'H' register operands:
17100
17101 In a pair of registers containing a DI or DF value the 'Q'
17102 operand returns the register number of the register containing
17103 the least significant part of the value. The 'R' operand returns
17104 the register number of the register containing the most
17105 significant part of the value.
17106
17107 The 'H' operand returns the higher of the two register numbers.
17108 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17109 same as the 'Q' operand, since the most significant part of the
17110 value is held in the lower number register. The reverse is true
17111 on systems where WORDS_BIG_ENDIAN is false.
17112
17113 The purpose of these operands is to distinguish between cases
17114 where the endian-ness of the values is important (for example
17115 when they are added together), and cases where the endian-ness
17116 is irrelevant, but the order of register operations is important.
17117 For example when loading a value from memory into a register
17118 pair, the endian-ness does not matter. Provided that the value
17119 from the lower memory address is put into the lower numbered
17120 register, and the value from the higher address is put into the
17121 higher numbered register, the load will work regardless of whether
17122 the value being loaded is big-wordian or little-wordian. The
17123 order of the two register loads can matter however, if the address
17124 of the memory location is actually held in one of the registers
17125 being overwritten by the load.
17126
17127 The 'Q' and 'R' constraints are also available for 64-bit
17128 constants. */
17129 case 'Q':
17130 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17131 {
17132 rtx part = gen_lowpart (SImode, x);
17133 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17134 return;
17135 }
17136
17137 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17138 {
17139 output_operand_lossage ("invalid operand for code '%c'", code);
17140 return;
17141 }
17142
17143 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17144 return;
17145
17146 case 'R':
17147 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17148 {
17149 enum machine_mode mode = GET_MODE (x);
17150 rtx part;
17151
17152 if (mode == VOIDmode)
17153 mode = DImode;
17154 part = gen_highpart_mode (SImode, mode, x);
17155 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17156 return;
17157 }
17158
17159 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17160 {
17161 output_operand_lossage ("invalid operand for code '%c'", code);
17162 return;
17163 }
17164
17165 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17166 return;
17167
17168 case 'H':
17169 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17170 {
17171 output_operand_lossage ("invalid operand for code '%c'", code);
17172 return;
17173 }
17174
17175 asm_fprintf (stream, "%r", REGNO (x) + 1);
17176 return;
17177
17178 case 'J':
17179 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17180 {
17181 output_operand_lossage ("invalid operand for code '%c'", code);
17182 return;
17183 }
17184
17185 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17186 return;
17187
17188 case 'K':
17189 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17190 {
17191 output_operand_lossage ("invalid operand for code '%c'", code);
17192 return;
17193 }
17194
17195 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17196 return;
17197
17198 case 'm':
17199 asm_fprintf (stream, "%r",
17200 REG_P (XEXP (x, 0))
17201 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17202 return;
17203
17204 case 'M':
17205 asm_fprintf (stream, "{%r-%r}",
17206 REGNO (x),
17207 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17208 return;
17209
17210 /* Like 'M', but writing doubleword vector registers, for use by Neon
17211 insns. */
17212 case 'h':
17213 {
17214 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17215 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17216 if (numregs == 1)
17217 asm_fprintf (stream, "{d%d}", regno);
17218 else
17219 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17220 }
17221 return;
17222
17223 case 'd':
17224 /* CONST_TRUE_RTX means always -- that's the default. */
17225 if (x == const_true_rtx)
17226 return;
17227
17228 if (!COMPARISON_P (x))
17229 {
17230 output_operand_lossage ("invalid operand for code '%c'", code);
17231 return;
17232 }
17233
17234 fputs (arm_condition_codes[get_arm_condition_code (x)],
17235 stream);
17236 return;
17237
17238 case 'D':
17239 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17240 want to do that. */
17241 if (x == const_true_rtx)
17242 {
17243 output_operand_lossage ("instruction never executed");
17244 return;
17245 }
17246 if (!COMPARISON_P (x))
17247 {
17248 output_operand_lossage ("invalid operand for code '%c'", code);
17249 return;
17250 }
17251
17252 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17253 (get_arm_condition_code (x))],
17254 stream);
17255 return;
17256
17257 case 's':
17258 case 'V':
17259 case 'W':
17260 case 'X':
17261 case 'Y':
17262 case 'Z':
17263 /* Former Maverick support, removed after GCC-4.7. */
17264 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17265 return;
17266
17267 case 'U':
17268 if (!REG_P (x)
17269 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17270 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17271 /* Bad value for wCG register number. */
17272 {
17273 output_operand_lossage ("invalid operand for code '%c'", code);
17274 return;
17275 }
17276
17277 else
17278 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17279 return;
17280
17281 /* Print an iWMMXt control register name. */
17282 case 'w':
17283 if (!CONST_INT_P (x)
17284 || INTVAL (x) < 0
17285 || INTVAL (x) >= 16)
17286 /* Bad value for wC register number. */
17287 {
17288 output_operand_lossage ("invalid operand for code '%c'", code);
17289 return;
17290 }
17291
17292 else
17293 {
17294 static const char * wc_reg_names [16] =
17295 {
17296 "wCID", "wCon", "wCSSF", "wCASF",
17297 "wC4", "wC5", "wC6", "wC7",
17298 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17299 "wC12", "wC13", "wC14", "wC15"
17300 };
17301
17302 fprintf (stream, wc_reg_names [INTVAL (x)]);
17303 }
17304 return;
17305
17306 /* Print the high single-precision register of a VFP double-precision
17307 register. */
17308 case 'p':
17309 {
17310 int mode = GET_MODE (x);
17311 int regno;
17312
17313 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
17314 {
17315 output_operand_lossage ("invalid operand for code '%c'", code);
17316 return;
17317 }
17318
17319 regno = REGNO (x);
17320 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17321 {
17322 output_operand_lossage ("invalid operand for code '%c'", code);
17323 return;
17324 }
17325
17326 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17327 }
17328 return;
17329
17330 /* Print a VFP/Neon double precision or quad precision register name. */
17331 case 'P':
17332 case 'q':
17333 {
17334 int mode = GET_MODE (x);
17335 int is_quad = (code == 'q');
17336 int regno;
17337
17338 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17339 {
17340 output_operand_lossage ("invalid operand for code '%c'", code);
17341 return;
17342 }
17343
17344 if (!REG_P (x)
17345 || !IS_VFP_REGNUM (REGNO (x)))
17346 {
17347 output_operand_lossage ("invalid operand for code '%c'", code);
17348 return;
17349 }
17350
17351 regno = REGNO (x);
17352 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17353 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17354 {
17355 output_operand_lossage ("invalid operand for code '%c'", code);
17356 return;
17357 }
17358
17359 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17360 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17361 }
17362 return;
17363
17364 /* These two codes print the low/high doubleword register of a Neon quad
17365 register, respectively. For pair-structure types, can also print
17366 low/high quadword registers. */
17367 case 'e':
17368 case 'f':
17369 {
17370 int mode = GET_MODE (x);
17371 int regno;
17372
17373 if ((GET_MODE_SIZE (mode) != 16
17374 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
17375 {
17376 output_operand_lossage ("invalid operand for code '%c'", code);
17377 return;
17378 }
17379
17380 regno = REGNO (x);
17381 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17382 {
17383 output_operand_lossage ("invalid operand for code '%c'", code);
17384 return;
17385 }
17386
17387 if (GET_MODE_SIZE (mode) == 16)
17388 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17389 + (code == 'f' ? 1 : 0));
17390 else
17391 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17392 + (code == 'f' ? 1 : 0));
17393 }
17394 return;
17395
17396 /* Print a VFPv3 floating-point constant, represented as an integer
17397 index. */
17398 case 'G':
17399 {
17400 int index = vfp3_const_double_index (x);
17401 gcc_assert (index != -1);
17402 fprintf (stream, "%d", index);
17403 }
17404 return;
17405
17406 /* Print bits representing opcode features for Neon.
17407
17408 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17409 and polynomials as unsigned.
17410
17411 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17412
17413 Bit 2 is 1 for rounding functions, 0 otherwise. */
17414
17415 /* Identify the type as 's', 'u', 'p' or 'f'. */
17416 case 'T':
17417 {
17418 HOST_WIDE_INT bits = INTVAL (x);
17419 fputc ("uspf"[bits & 3], stream);
17420 }
17421 return;
17422
17423 /* Likewise, but signed and unsigned integers are both 'i'. */
17424 case 'F':
17425 {
17426 HOST_WIDE_INT bits = INTVAL (x);
17427 fputc ("iipf"[bits & 3], stream);
17428 }
17429 return;
17430
17431 /* As for 'T', but emit 'u' instead of 'p'. */
17432 case 't':
17433 {
17434 HOST_WIDE_INT bits = INTVAL (x);
17435 fputc ("usuf"[bits & 3], stream);
17436 }
17437 return;
17438
17439 /* Bit 2: rounding (vs none). */
17440 case 'O':
17441 {
17442 HOST_WIDE_INT bits = INTVAL (x);
17443 fputs ((bits & 4) != 0 ? "r" : "", stream);
17444 }
17445 return;
17446
17447 /* Memory operand for vld1/vst1 instruction. */
17448 case 'A':
17449 {
17450 rtx addr;
17451 bool postinc = FALSE;
17452 unsigned align, memsize, align_bits;
17453
17454 gcc_assert (MEM_P (x));
17455 addr = XEXP (x, 0);
17456 if (GET_CODE (addr) == POST_INC)
17457 {
17458 postinc = 1;
17459 addr = XEXP (addr, 0);
17460 }
17461 asm_fprintf (stream, "[%r", REGNO (addr));
17462
17463 /* We know the alignment of this access, so we can emit a hint in the
17464 instruction (for some alignments) as an aid to the memory subsystem
17465 of the target. */
17466 align = MEM_ALIGN (x) >> 3;
17467 memsize = MEM_SIZE (x);
17468
17469 /* Only certain alignment specifiers are supported by the hardware. */
17470 if (memsize == 32 && (align % 32) == 0)
17471 align_bits = 256;
17472 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17473 align_bits = 128;
17474 else if (memsize >= 8 && (align % 8) == 0)
17475 align_bits = 64;
17476 else
17477 align_bits = 0;
17478
17479 if (align_bits != 0)
17480 asm_fprintf (stream, ":%d", align_bits);
17481
17482 asm_fprintf (stream, "]");
17483
17484 if (postinc)
17485 fputs("!", stream);
17486 }
17487 return;
17488
17489 case 'C':
17490 {
17491 rtx addr;
17492
17493 gcc_assert (MEM_P (x));
17494 addr = XEXP (x, 0);
17495 gcc_assert (REG_P (addr));
17496 asm_fprintf (stream, "[%r]", REGNO (addr));
17497 }
17498 return;
17499
17500 /* Translate an S register number into a D register number and element index. */
17501 case 'y':
17502 {
17503 int mode = GET_MODE (x);
17504 int regno;
17505
17506 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
17507 {
17508 output_operand_lossage ("invalid operand for code '%c'", code);
17509 return;
17510 }
17511
17512 regno = REGNO (x);
17513 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17514 {
17515 output_operand_lossage ("invalid operand for code '%c'", code);
17516 return;
17517 }
17518
17519 regno = regno - FIRST_VFP_REGNUM;
17520 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17521 }
17522 return;
17523
17524 case 'v':
17525 gcc_assert (CONST_DOUBLE_P (x));
17526 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17527 return;
17528
17529 /* Register specifier for vld1.16/vst1.16. Translate the S register
17530 number into a D register number and element index. */
17531 case 'z':
17532 {
17533 int mode = GET_MODE (x);
17534 int regno;
17535
17536 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
17537 {
17538 output_operand_lossage ("invalid operand for code '%c'", code);
17539 return;
17540 }
17541
17542 regno = REGNO (x);
17543 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17544 {
17545 output_operand_lossage ("invalid operand for code '%c'", code);
17546 return;
17547 }
17548
17549 regno = regno - FIRST_VFP_REGNUM;
17550 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17551 }
17552 return;
17553
17554 default:
17555 if (x == 0)
17556 {
17557 output_operand_lossage ("missing operand");
17558 return;
17559 }
17560
17561 switch (GET_CODE (x))
17562 {
17563 case REG:
17564 asm_fprintf (stream, "%r", REGNO (x));
17565 break;
17566
17567 case MEM:
17568 output_memory_reference_mode = GET_MODE (x);
17569 output_address (XEXP (x, 0));
17570 break;
17571
17572 case CONST_DOUBLE:
17573 if (TARGET_NEON)
17574 {
17575 char fpstr[20];
17576 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17577 sizeof (fpstr), 0, 1);
17578 fprintf (stream, "#%s", fpstr);
17579 }
17580 else
17581 fprintf (stream, "#%s", fp_immediate_constant (x));
17582 break;
17583
17584 default:
17585 gcc_assert (GET_CODE (x) != NEG);
17586 fputc ('#', stream);
17587 if (GET_CODE (x) == HIGH)
17588 {
17589 fputs (":lower16:", stream);
17590 x = XEXP (x, 0);
17591 }
17592
17593 output_addr_const (stream, x);
17594 break;
17595 }
17596 }
17597 }
17598 \f
17599 /* Target hook for printing a memory address. */
17600 static void
17601 arm_print_operand_address (FILE *stream, rtx x)
17602 {
17603 if (TARGET_32BIT)
17604 {
17605 int is_minus = GET_CODE (x) == MINUS;
17606
17607 if (REG_P (x))
17608 asm_fprintf (stream, "[%r]", REGNO (x));
17609 else if (GET_CODE (x) == PLUS || is_minus)
17610 {
17611 rtx base = XEXP (x, 0);
17612 rtx index = XEXP (x, 1);
17613 HOST_WIDE_INT offset = 0;
17614 if (!REG_P (base)
17615 || (REG_P (index) && REGNO (index) == SP_REGNUM))
17616 {
17617 /* Ensure that BASE is a register. */
17618 /* (one of them must be). */
17619 /* Also ensure the SP is not used as in index register. */
17620 rtx temp = base;
17621 base = index;
17622 index = temp;
17623 }
17624 switch (GET_CODE (index))
17625 {
17626 case CONST_INT:
17627 offset = INTVAL (index);
17628 if (is_minus)
17629 offset = -offset;
17630 asm_fprintf (stream, "[%r, #%wd]",
17631 REGNO (base), offset);
17632 break;
17633
17634 case REG:
17635 asm_fprintf (stream, "[%r, %s%r]",
17636 REGNO (base), is_minus ? "-" : "",
17637 REGNO (index));
17638 break;
17639
17640 case MULT:
17641 case ASHIFTRT:
17642 case LSHIFTRT:
17643 case ASHIFT:
17644 case ROTATERT:
17645 {
17646 asm_fprintf (stream, "[%r, %s%r",
17647 REGNO (base), is_minus ? "-" : "",
17648 REGNO (XEXP (index, 0)));
17649 arm_print_operand (stream, index, 'S');
17650 fputs ("]", stream);
17651 break;
17652 }
17653
17654 default:
17655 gcc_unreachable ();
17656 }
17657 }
17658 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17659 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17660 {
17661 extern enum machine_mode output_memory_reference_mode;
17662
17663 gcc_assert (REG_P (XEXP (x, 0)));
17664
17665 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17666 asm_fprintf (stream, "[%r, #%s%d]!",
17667 REGNO (XEXP (x, 0)),
17668 GET_CODE (x) == PRE_DEC ? "-" : "",
17669 GET_MODE_SIZE (output_memory_reference_mode));
17670 else
17671 asm_fprintf (stream, "[%r], #%s%d",
17672 REGNO (XEXP (x, 0)),
17673 GET_CODE (x) == POST_DEC ? "-" : "",
17674 GET_MODE_SIZE (output_memory_reference_mode));
17675 }
17676 else if (GET_CODE (x) == PRE_MODIFY)
17677 {
17678 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17679 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17680 asm_fprintf (stream, "#%wd]!",
17681 INTVAL (XEXP (XEXP (x, 1), 1)));
17682 else
17683 asm_fprintf (stream, "%r]!",
17684 REGNO (XEXP (XEXP (x, 1), 1)));
17685 }
17686 else if (GET_CODE (x) == POST_MODIFY)
17687 {
17688 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17689 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17690 asm_fprintf (stream, "#%wd",
17691 INTVAL (XEXP (XEXP (x, 1), 1)));
17692 else
17693 asm_fprintf (stream, "%r",
17694 REGNO (XEXP (XEXP (x, 1), 1)));
17695 }
17696 else output_addr_const (stream, x);
17697 }
17698 else
17699 {
17700 if (REG_P (x))
17701 asm_fprintf (stream, "[%r]", REGNO (x));
17702 else if (GET_CODE (x) == POST_INC)
17703 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17704 else if (GET_CODE (x) == PLUS)
17705 {
17706 gcc_assert (REG_P (XEXP (x, 0)));
17707 if (CONST_INT_P (XEXP (x, 1)))
17708 asm_fprintf (stream, "[%r, #%wd]",
17709 REGNO (XEXP (x, 0)),
17710 INTVAL (XEXP (x, 1)));
17711 else
17712 asm_fprintf (stream, "[%r, %r]",
17713 REGNO (XEXP (x, 0)),
17714 REGNO (XEXP (x, 1)));
17715 }
17716 else
17717 output_addr_const (stream, x);
17718 }
17719 }
17720 \f
17721 /* Target hook for indicating whether a punctuation character for
17722 TARGET_PRINT_OPERAND is valid. */
17723 static bool
17724 arm_print_operand_punct_valid_p (unsigned char code)
17725 {
17726 return (code == '@' || code == '|' || code == '.'
17727 || code == '(' || code == ')' || code == '#'
17728 || (TARGET_32BIT && (code == '?'))
17729 || (TARGET_THUMB2 && (code == '!'))
17730 || (TARGET_THUMB && (code == '_')));
17731 }
17732 \f
17733 /* Target hook for assembling integer objects. The ARM version needs to
17734 handle word-sized values specially. */
17735 static bool
17736 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17737 {
17738 enum machine_mode mode;
17739
17740 if (size == UNITS_PER_WORD && aligned_p)
17741 {
17742 fputs ("\t.word\t", asm_out_file);
17743 output_addr_const (asm_out_file, x);
17744
17745 /* Mark symbols as position independent. We only do this in the
17746 .text segment, not in the .data segment. */
17747 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17748 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17749 {
17750 /* See legitimize_pic_address for an explanation of the
17751 TARGET_VXWORKS_RTP check. */
17752 if (TARGET_VXWORKS_RTP
17753 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17754 fputs ("(GOT)", asm_out_file);
17755 else
17756 fputs ("(GOTOFF)", asm_out_file);
17757 }
17758 fputc ('\n', asm_out_file);
17759 return true;
17760 }
17761
17762 mode = GET_MODE (x);
17763
17764 if (arm_vector_mode_supported_p (mode))
17765 {
17766 int i, units;
17767
17768 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17769
17770 units = CONST_VECTOR_NUNITS (x);
17771 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17772
17773 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17774 for (i = 0; i < units; i++)
17775 {
17776 rtx elt = CONST_VECTOR_ELT (x, i);
17777 assemble_integer
17778 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17779 }
17780 else
17781 for (i = 0; i < units; i++)
17782 {
17783 rtx elt = CONST_VECTOR_ELT (x, i);
17784 REAL_VALUE_TYPE rval;
17785
17786 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17787
17788 assemble_real
17789 (rval, GET_MODE_INNER (mode),
17790 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17791 }
17792
17793 return true;
17794 }
17795
17796 return default_assemble_integer (x, size, aligned_p);
17797 }
17798
17799 static void
17800 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17801 {
17802 section *s;
17803
17804 if (!TARGET_AAPCS_BASED)
17805 {
17806 (is_ctor ?
17807 default_named_section_asm_out_constructor
17808 : default_named_section_asm_out_destructor) (symbol, priority);
17809 return;
17810 }
17811
17812 /* Put these in the .init_array section, using a special relocation. */
17813 if (priority != DEFAULT_INIT_PRIORITY)
17814 {
17815 char buf[18];
17816 sprintf (buf, "%s.%.5u",
17817 is_ctor ? ".init_array" : ".fini_array",
17818 priority);
17819 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17820 }
17821 else if (is_ctor)
17822 s = ctors_section;
17823 else
17824 s = dtors_section;
17825
17826 switch_to_section (s);
17827 assemble_align (POINTER_SIZE);
17828 fputs ("\t.word\t", asm_out_file);
17829 output_addr_const (asm_out_file, symbol);
17830 fputs ("(target1)\n", asm_out_file);
17831 }
17832
17833 /* Add a function to the list of static constructors. */
17834
17835 static void
17836 arm_elf_asm_constructor (rtx symbol, int priority)
17837 {
17838 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17839 }
17840
17841 /* Add a function to the list of static destructors. */
17842
17843 static void
17844 arm_elf_asm_destructor (rtx symbol, int priority)
17845 {
17846 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17847 }
17848 \f
17849 /* A finite state machine takes care of noticing whether or not instructions
17850 can be conditionally executed, and thus decrease execution time and code
17851 size by deleting branch instructions. The fsm is controlled by
17852 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17853
17854 /* The state of the fsm controlling condition codes are:
17855 0: normal, do nothing special
17856 1: make ASM_OUTPUT_OPCODE not output this instruction
17857 2: make ASM_OUTPUT_OPCODE not output this instruction
17858 3: make instructions conditional
17859 4: make instructions conditional
17860
17861 State transitions (state->state by whom under condition):
17862 0 -> 1 final_prescan_insn if the `target' is a label
17863 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17864 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17865 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17866 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17867 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17868 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17869 (the target insn is arm_target_insn).
17870
17871 If the jump clobbers the conditions then we use states 2 and 4.
17872
17873 A similar thing can be done with conditional return insns.
17874
17875 XXX In case the `target' is an unconditional branch, this conditionalising
17876 of the instructions always reduces code size, but not always execution
17877 time. But then, I want to reduce the code size to somewhere near what
17878 /bin/cc produces. */
17879
17880 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17881 instructions. When a COND_EXEC instruction is seen the subsequent
17882 instructions are scanned so that multiple conditional instructions can be
17883 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17884 specify the length and true/false mask for the IT block. These will be
17885 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17886
17887 /* Returns the index of the ARM condition code string in
17888 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17889 COMPARISON should be an rtx like `(eq (...) (...))'. */
17890
17891 enum arm_cond_code
17892 maybe_get_arm_condition_code (rtx comparison)
17893 {
17894 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17895 enum arm_cond_code code;
17896 enum rtx_code comp_code = GET_CODE (comparison);
17897
17898 if (GET_MODE_CLASS (mode) != MODE_CC)
17899 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17900 XEXP (comparison, 1));
17901
17902 switch (mode)
17903 {
17904 case CC_DNEmode: code = ARM_NE; goto dominance;
17905 case CC_DEQmode: code = ARM_EQ; goto dominance;
17906 case CC_DGEmode: code = ARM_GE; goto dominance;
17907 case CC_DGTmode: code = ARM_GT; goto dominance;
17908 case CC_DLEmode: code = ARM_LE; goto dominance;
17909 case CC_DLTmode: code = ARM_LT; goto dominance;
17910 case CC_DGEUmode: code = ARM_CS; goto dominance;
17911 case CC_DGTUmode: code = ARM_HI; goto dominance;
17912 case CC_DLEUmode: code = ARM_LS; goto dominance;
17913 case CC_DLTUmode: code = ARM_CC;
17914
17915 dominance:
17916 if (comp_code == EQ)
17917 return ARM_INVERSE_CONDITION_CODE (code);
17918 if (comp_code == NE)
17919 return code;
17920 return ARM_NV;
17921
17922 case CC_NOOVmode:
17923 switch (comp_code)
17924 {
17925 case NE: return ARM_NE;
17926 case EQ: return ARM_EQ;
17927 case GE: return ARM_PL;
17928 case LT: return ARM_MI;
17929 default: return ARM_NV;
17930 }
17931
17932 case CC_Zmode:
17933 switch (comp_code)
17934 {
17935 case NE: return ARM_NE;
17936 case EQ: return ARM_EQ;
17937 default: return ARM_NV;
17938 }
17939
17940 case CC_Nmode:
17941 switch (comp_code)
17942 {
17943 case NE: return ARM_MI;
17944 case EQ: return ARM_PL;
17945 default: return ARM_NV;
17946 }
17947
17948 case CCFPEmode:
17949 case CCFPmode:
17950 /* We can handle all cases except UNEQ and LTGT. */
17951 switch (comp_code)
17952 {
17953 case GE: return ARM_GE;
17954 case GT: return ARM_GT;
17955 case LE: return ARM_LS;
17956 case LT: return ARM_MI;
17957 case NE: return ARM_NE;
17958 case EQ: return ARM_EQ;
17959 case ORDERED: return ARM_VC;
17960 case UNORDERED: return ARM_VS;
17961 case UNLT: return ARM_LT;
17962 case UNLE: return ARM_LE;
17963 case UNGT: return ARM_HI;
17964 case UNGE: return ARM_PL;
17965 /* UNEQ and LTGT do not have a representation. */
17966 case UNEQ: /* Fall through. */
17967 case LTGT: /* Fall through. */
17968 default: return ARM_NV;
17969 }
17970
17971 case CC_SWPmode:
17972 switch (comp_code)
17973 {
17974 case NE: return ARM_NE;
17975 case EQ: return ARM_EQ;
17976 case GE: return ARM_LE;
17977 case GT: return ARM_LT;
17978 case LE: return ARM_GE;
17979 case LT: return ARM_GT;
17980 case GEU: return ARM_LS;
17981 case GTU: return ARM_CC;
17982 case LEU: return ARM_CS;
17983 case LTU: return ARM_HI;
17984 default: return ARM_NV;
17985 }
17986
17987 case CC_Cmode:
17988 switch (comp_code)
17989 {
17990 case LTU: return ARM_CS;
17991 case GEU: return ARM_CC;
17992 default: return ARM_NV;
17993 }
17994
17995 case CC_CZmode:
17996 switch (comp_code)
17997 {
17998 case NE: return ARM_NE;
17999 case EQ: return ARM_EQ;
18000 case GEU: return ARM_CS;
18001 case GTU: return ARM_HI;
18002 case LEU: return ARM_LS;
18003 case LTU: return ARM_CC;
18004 default: return ARM_NV;
18005 }
18006
18007 case CC_NCVmode:
18008 switch (comp_code)
18009 {
18010 case GE: return ARM_GE;
18011 case LT: return ARM_LT;
18012 case GEU: return ARM_CS;
18013 case LTU: return ARM_CC;
18014 default: return ARM_NV;
18015 }
18016
18017 case CCmode:
18018 switch (comp_code)
18019 {
18020 case NE: return ARM_NE;
18021 case EQ: return ARM_EQ;
18022 case GE: return ARM_GE;
18023 case GT: return ARM_GT;
18024 case LE: return ARM_LE;
18025 case LT: return ARM_LT;
18026 case GEU: return ARM_CS;
18027 case GTU: return ARM_HI;
18028 case LEU: return ARM_LS;
18029 case LTU: return ARM_CC;
18030 default: return ARM_NV;
18031 }
18032
18033 default: gcc_unreachable ();
18034 }
18035 }
18036
18037 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18038 static enum arm_cond_code
18039 get_arm_condition_code (rtx comparison)
18040 {
18041 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18042 gcc_assert (code != ARM_NV);
18043 return code;
18044 }
18045
18046 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18047 instructions. */
18048 void
18049 thumb2_final_prescan_insn (rtx insn)
18050 {
18051 rtx first_insn = insn;
18052 rtx body = PATTERN (insn);
18053 rtx predicate;
18054 enum arm_cond_code code;
18055 int n;
18056 int mask;
18057
18058 /* Remove the previous insn from the count of insns to be output. */
18059 if (arm_condexec_count)
18060 arm_condexec_count--;
18061
18062 /* Nothing to do if we are already inside a conditional block. */
18063 if (arm_condexec_count)
18064 return;
18065
18066 if (GET_CODE (body) != COND_EXEC)
18067 return;
18068
18069 /* Conditional jumps are implemented directly. */
18070 if (JUMP_P (insn))
18071 return;
18072
18073 predicate = COND_EXEC_TEST (body);
18074 arm_current_cc = get_arm_condition_code (predicate);
18075
18076 n = get_attr_ce_count (insn);
18077 arm_condexec_count = 1;
18078 arm_condexec_mask = (1 << n) - 1;
18079 arm_condexec_masklen = n;
18080 /* See if subsequent instructions can be combined into the same block. */
18081 for (;;)
18082 {
18083 insn = next_nonnote_insn (insn);
18084
18085 /* Jumping into the middle of an IT block is illegal, so a label or
18086 barrier terminates the block. */
18087 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
18088 break;
18089
18090 body = PATTERN (insn);
18091 /* USE and CLOBBER aren't really insns, so just skip them. */
18092 if (GET_CODE (body) == USE
18093 || GET_CODE (body) == CLOBBER)
18094 continue;
18095
18096 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18097 if (GET_CODE (body) != COND_EXEC)
18098 break;
18099 /* Allow up to 4 conditionally executed instructions in a block. */
18100 n = get_attr_ce_count (insn);
18101 if (arm_condexec_masklen + n > 4)
18102 break;
18103
18104 predicate = COND_EXEC_TEST (body);
18105 code = get_arm_condition_code (predicate);
18106 mask = (1 << n) - 1;
18107 if (arm_current_cc == code)
18108 arm_condexec_mask |= (mask << arm_condexec_masklen);
18109 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18110 break;
18111
18112 arm_condexec_count++;
18113 arm_condexec_masklen += n;
18114
18115 /* A jump must be the last instruction in a conditional block. */
18116 if (JUMP_P (insn))
18117 break;
18118 }
18119 /* Restore recog_data (getting the attributes of other insns can
18120 destroy this array, but final.c assumes that it remains intact
18121 across this call). */
18122 extract_constrain_insn_cached (first_insn);
18123 }
18124
18125 void
18126 arm_final_prescan_insn (rtx insn)
18127 {
18128 /* BODY will hold the body of INSN. */
18129 rtx body = PATTERN (insn);
18130
18131 /* This will be 1 if trying to repeat the trick, and things need to be
18132 reversed if it appears to fail. */
18133 int reverse = 0;
18134
18135 /* If we start with a return insn, we only succeed if we find another one. */
18136 int seeking_return = 0;
18137 enum rtx_code return_code = UNKNOWN;
18138
18139 /* START_INSN will hold the insn from where we start looking. This is the
18140 first insn after the following code_label if REVERSE is true. */
18141 rtx start_insn = insn;
18142
18143 /* If in state 4, check if the target branch is reached, in order to
18144 change back to state 0. */
18145 if (arm_ccfsm_state == 4)
18146 {
18147 if (insn == arm_target_insn)
18148 {
18149 arm_target_insn = NULL;
18150 arm_ccfsm_state = 0;
18151 }
18152 return;
18153 }
18154
18155 /* If in state 3, it is possible to repeat the trick, if this insn is an
18156 unconditional branch to a label, and immediately following this branch
18157 is the previous target label which is only used once, and the label this
18158 branch jumps to is not too far off. */
18159 if (arm_ccfsm_state == 3)
18160 {
18161 if (simplejump_p (insn))
18162 {
18163 start_insn = next_nonnote_insn (start_insn);
18164 if (BARRIER_P (start_insn))
18165 {
18166 /* XXX Isn't this always a barrier? */
18167 start_insn = next_nonnote_insn (start_insn);
18168 }
18169 if (LABEL_P (start_insn)
18170 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18171 && LABEL_NUSES (start_insn) == 1)
18172 reverse = TRUE;
18173 else
18174 return;
18175 }
18176 else if (ANY_RETURN_P (body))
18177 {
18178 start_insn = next_nonnote_insn (start_insn);
18179 if (BARRIER_P (start_insn))
18180 start_insn = next_nonnote_insn (start_insn);
18181 if (LABEL_P (start_insn)
18182 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18183 && LABEL_NUSES (start_insn) == 1)
18184 {
18185 reverse = TRUE;
18186 seeking_return = 1;
18187 return_code = GET_CODE (body);
18188 }
18189 else
18190 return;
18191 }
18192 else
18193 return;
18194 }
18195
18196 gcc_assert (!arm_ccfsm_state || reverse);
18197 if (!JUMP_P (insn))
18198 return;
18199
18200 /* This jump might be paralleled with a clobber of the condition codes
18201 the jump should always come first */
18202 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18203 body = XVECEXP (body, 0, 0);
18204
18205 if (reverse
18206 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18207 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18208 {
18209 int insns_skipped;
18210 int fail = FALSE, succeed = FALSE;
18211 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18212 int then_not_else = TRUE;
18213 rtx this_insn = start_insn, label = 0;
18214
18215 /* Register the insn jumped to. */
18216 if (reverse)
18217 {
18218 if (!seeking_return)
18219 label = XEXP (SET_SRC (body), 0);
18220 }
18221 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18222 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18223 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18224 {
18225 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18226 then_not_else = FALSE;
18227 }
18228 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18229 {
18230 seeking_return = 1;
18231 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18232 }
18233 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18234 {
18235 seeking_return = 1;
18236 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18237 then_not_else = FALSE;
18238 }
18239 else
18240 gcc_unreachable ();
18241
18242 /* See how many insns this branch skips, and what kind of insns. If all
18243 insns are okay, and the label or unconditional branch to the same
18244 label is not too far away, succeed. */
18245 for (insns_skipped = 0;
18246 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18247 {
18248 rtx scanbody;
18249
18250 this_insn = next_nonnote_insn (this_insn);
18251 if (!this_insn)
18252 break;
18253
18254 switch (GET_CODE (this_insn))
18255 {
18256 case CODE_LABEL:
18257 /* Succeed if it is the target label, otherwise fail since
18258 control falls in from somewhere else. */
18259 if (this_insn == label)
18260 {
18261 arm_ccfsm_state = 1;
18262 succeed = TRUE;
18263 }
18264 else
18265 fail = TRUE;
18266 break;
18267
18268 case BARRIER:
18269 /* Succeed if the following insn is the target label.
18270 Otherwise fail.
18271 If return insns are used then the last insn in a function
18272 will be a barrier. */
18273 this_insn = next_nonnote_insn (this_insn);
18274 if (this_insn && this_insn == label)
18275 {
18276 arm_ccfsm_state = 1;
18277 succeed = TRUE;
18278 }
18279 else
18280 fail = TRUE;
18281 break;
18282
18283 case CALL_INSN:
18284 /* The AAPCS says that conditional calls should not be
18285 used since they make interworking inefficient (the
18286 linker can't transform BL<cond> into BLX). That's
18287 only a problem if the machine has BLX. */
18288 if (arm_arch5)
18289 {
18290 fail = TRUE;
18291 break;
18292 }
18293
18294 /* Succeed if the following insn is the target label, or
18295 if the following two insns are a barrier and the
18296 target label. */
18297 this_insn = next_nonnote_insn (this_insn);
18298 if (this_insn && BARRIER_P (this_insn))
18299 this_insn = next_nonnote_insn (this_insn);
18300
18301 if (this_insn && this_insn == label
18302 && insns_skipped < max_insns_skipped)
18303 {
18304 arm_ccfsm_state = 1;
18305 succeed = TRUE;
18306 }
18307 else
18308 fail = TRUE;
18309 break;
18310
18311 case JUMP_INSN:
18312 /* If this is an unconditional branch to the same label, succeed.
18313 If it is to another label, do nothing. If it is conditional,
18314 fail. */
18315 /* XXX Probably, the tests for SET and the PC are
18316 unnecessary. */
18317
18318 scanbody = PATTERN (this_insn);
18319 if (GET_CODE (scanbody) == SET
18320 && GET_CODE (SET_DEST (scanbody)) == PC)
18321 {
18322 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18323 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18324 {
18325 arm_ccfsm_state = 2;
18326 succeed = TRUE;
18327 }
18328 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18329 fail = TRUE;
18330 }
18331 /* Fail if a conditional return is undesirable (e.g. on a
18332 StrongARM), but still allow this if optimizing for size. */
18333 else if (GET_CODE (scanbody) == return_code
18334 && !use_return_insn (TRUE, NULL)
18335 && !optimize_size)
18336 fail = TRUE;
18337 else if (GET_CODE (scanbody) == return_code)
18338 {
18339 arm_ccfsm_state = 2;
18340 succeed = TRUE;
18341 }
18342 else if (GET_CODE (scanbody) == PARALLEL)
18343 {
18344 switch (get_attr_conds (this_insn))
18345 {
18346 case CONDS_NOCOND:
18347 break;
18348 default:
18349 fail = TRUE;
18350 break;
18351 }
18352 }
18353 else
18354 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18355
18356 break;
18357
18358 case INSN:
18359 /* Instructions using or affecting the condition codes make it
18360 fail. */
18361 scanbody = PATTERN (this_insn);
18362 if (!(GET_CODE (scanbody) == SET
18363 || GET_CODE (scanbody) == PARALLEL)
18364 || get_attr_conds (this_insn) != CONDS_NOCOND)
18365 fail = TRUE;
18366 break;
18367
18368 default:
18369 break;
18370 }
18371 }
18372 if (succeed)
18373 {
18374 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18375 arm_target_label = CODE_LABEL_NUMBER (label);
18376 else
18377 {
18378 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18379
18380 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18381 {
18382 this_insn = next_nonnote_insn (this_insn);
18383 gcc_assert (!this_insn
18384 || (!BARRIER_P (this_insn)
18385 && !LABEL_P (this_insn)));
18386 }
18387 if (!this_insn)
18388 {
18389 /* Oh, dear! we ran off the end.. give up. */
18390 extract_constrain_insn_cached (insn);
18391 arm_ccfsm_state = 0;
18392 arm_target_insn = NULL;
18393 return;
18394 }
18395 arm_target_insn = this_insn;
18396 }
18397
18398 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18399 what it was. */
18400 if (!reverse)
18401 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18402
18403 if (reverse || then_not_else)
18404 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18405 }
18406
18407 /* Restore recog_data (getting the attributes of other insns can
18408 destroy this array, but final.c assumes that it remains intact
18409 across this call. */
18410 extract_constrain_insn_cached (insn);
18411 }
18412 }
18413
18414 /* Output IT instructions. */
18415 void
18416 thumb2_asm_output_opcode (FILE * stream)
18417 {
18418 char buff[5];
18419 int n;
18420
18421 if (arm_condexec_mask)
18422 {
18423 for (n = 0; n < arm_condexec_masklen; n++)
18424 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18425 buff[n] = 0;
18426 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18427 arm_condition_codes[arm_current_cc]);
18428 arm_condexec_mask = 0;
18429 }
18430 }
18431
18432 /* Returns true if REGNO is a valid register
18433 for holding a quantity of type MODE. */
18434 int
18435 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18436 {
18437 if (GET_MODE_CLASS (mode) == MODE_CC)
18438 return (regno == CC_REGNUM
18439 || (TARGET_HARD_FLOAT && TARGET_VFP
18440 && regno == VFPCC_REGNUM));
18441
18442 if (TARGET_THUMB1)
18443 /* For the Thumb we only allow values bigger than SImode in
18444 registers 0 - 6, so that there is always a second low
18445 register available to hold the upper part of the value.
18446 We probably we ought to ensure that the register is the
18447 start of an even numbered register pair. */
18448 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18449
18450 if (TARGET_HARD_FLOAT && TARGET_VFP
18451 && IS_VFP_REGNUM (regno))
18452 {
18453 if (mode == SFmode || mode == SImode)
18454 return VFP_REGNO_OK_FOR_SINGLE (regno);
18455
18456 if (mode == DFmode)
18457 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18458
18459 /* VFP registers can hold HFmode values, but there is no point in
18460 putting them there unless we have hardware conversion insns. */
18461 if (mode == HFmode)
18462 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18463
18464 if (TARGET_NEON)
18465 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18466 || (VALID_NEON_QREG_MODE (mode)
18467 && NEON_REGNO_OK_FOR_QUAD (regno))
18468 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18469 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18470 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18471 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18472 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18473
18474 return FALSE;
18475 }
18476
18477 if (TARGET_REALLY_IWMMXT)
18478 {
18479 if (IS_IWMMXT_GR_REGNUM (regno))
18480 return mode == SImode;
18481
18482 if (IS_IWMMXT_REGNUM (regno))
18483 return VALID_IWMMXT_REG_MODE (mode);
18484 }
18485
18486 /* We allow almost any value to be stored in the general registers.
18487 Restrict doubleword quantities to even register pairs so that we can
18488 use ldrd. Do not allow very large Neon structure opaque modes in
18489 general registers; they would use too many. */
18490 if (regno <= LAST_ARM_REGNUM)
18491 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18492 && ARM_NUM_REGS (mode) <= 4;
18493
18494 if (regno == FRAME_POINTER_REGNUM
18495 || regno == ARG_POINTER_REGNUM)
18496 /* We only allow integers in the fake hard registers. */
18497 return GET_MODE_CLASS (mode) == MODE_INT;
18498
18499 return FALSE;
18500 }
18501
18502 /* Implement MODES_TIEABLE_P. */
18503
18504 bool
18505 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18506 {
18507 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18508 return true;
18509
18510 /* We specifically want to allow elements of "structure" modes to
18511 be tieable to the structure. This more general condition allows
18512 other rarer situations too. */
18513 if (TARGET_NEON
18514 && (VALID_NEON_DREG_MODE (mode1)
18515 || VALID_NEON_QREG_MODE (mode1)
18516 || VALID_NEON_STRUCT_MODE (mode1))
18517 && (VALID_NEON_DREG_MODE (mode2)
18518 || VALID_NEON_QREG_MODE (mode2)
18519 || VALID_NEON_STRUCT_MODE (mode2)))
18520 return true;
18521
18522 return false;
18523 }
18524
18525 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18526 not used in arm mode. */
18527
18528 enum reg_class
18529 arm_regno_class (int regno)
18530 {
18531 if (TARGET_THUMB1)
18532 {
18533 if (regno == STACK_POINTER_REGNUM)
18534 return STACK_REG;
18535 if (regno == CC_REGNUM)
18536 return CC_REG;
18537 if (regno < 8)
18538 return LO_REGS;
18539 return HI_REGS;
18540 }
18541
18542 if (TARGET_THUMB2 && regno < 8)
18543 return LO_REGS;
18544
18545 if ( regno <= LAST_ARM_REGNUM
18546 || regno == FRAME_POINTER_REGNUM
18547 || regno == ARG_POINTER_REGNUM)
18548 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18549
18550 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18551 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18552
18553 if (IS_VFP_REGNUM (regno))
18554 {
18555 if (regno <= D7_VFP_REGNUM)
18556 return VFP_D0_D7_REGS;
18557 else if (regno <= LAST_LO_VFP_REGNUM)
18558 return VFP_LO_REGS;
18559 else
18560 return VFP_HI_REGS;
18561 }
18562
18563 if (IS_IWMMXT_REGNUM (regno))
18564 return IWMMXT_REGS;
18565
18566 if (IS_IWMMXT_GR_REGNUM (regno))
18567 return IWMMXT_GR_REGS;
18568
18569 return NO_REGS;
18570 }
18571
18572 /* Handle a special case when computing the offset
18573 of an argument from the frame pointer. */
18574 int
18575 arm_debugger_arg_offset (int value, rtx addr)
18576 {
18577 rtx insn;
18578
18579 /* We are only interested if dbxout_parms() failed to compute the offset. */
18580 if (value != 0)
18581 return 0;
18582
18583 /* We can only cope with the case where the address is held in a register. */
18584 if (!REG_P (addr))
18585 return 0;
18586
18587 /* If we are using the frame pointer to point at the argument, then
18588 an offset of 0 is correct. */
18589 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18590 return 0;
18591
18592 /* If we are using the stack pointer to point at the
18593 argument, then an offset of 0 is correct. */
18594 /* ??? Check this is consistent with thumb2 frame layout. */
18595 if ((TARGET_THUMB || !frame_pointer_needed)
18596 && REGNO (addr) == SP_REGNUM)
18597 return 0;
18598
18599 /* Oh dear. The argument is pointed to by a register rather
18600 than being held in a register, or being stored at a known
18601 offset from the frame pointer. Since GDB only understands
18602 those two kinds of argument we must translate the address
18603 held in the register into an offset from the frame pointer.
18604 We do this by searching through the insns for the function
18605 looking to see where this register gets its value. If the
18606 register is initialized from the frame pointer plus an offset
18607 then we are in luck and we can continue, otherwise we give up.
18608
18609 This code is exercised by producing debugging information
18610 for a function with arguments like this:
18611
18612 double func (double a, double b, int c, double d) {return d;}
18613
18614 Without this code the stab for parameter 'd' will be set to
18615 an offset of 0 from the frame pointer, rather than 8. */
18616
18617 /* The if() statement says:
18618
18619 If the insn is a normal instruction
18620 and if the insn is setting the value in a register
18621 and if the register being set is the register holding the address of the argument
18622 and if the address is computing by an addition
18623 that involves adding to a register
18624 which is the frame pointer
18625 a constant integer
18626
18627 then... */
18628
18629 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18630 {
18631 if ( NONJUMP_INSN_P (insn)
18632 && GET_CODE (PATTERN (insn)) == SET
18633 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18634 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18635 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
18636 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18637 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
18638 )
18639 {
18640 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18641
18642 break;
18643 }
18644 }
18645
18646 if (value == 0)
18647 {
18648 debug_rtx (addr);
18649 warning (0, "unable to compute real location of stacked parameter");
18650 value = 8; /* XXX magic hack */
18651 }
18652
18653 return value;
18654 }
18655 \f
18656 typedef enum {
18657 T_V8QI,
18658 T_V4HI,
18659 T_V2SI,
18660 T_V2SF,
18661 T_DI,
18662 T_V16QI,
18663 T_V8HI,
18664 T_V4SI,
18665 T_V4SF,
18666 T_V2DI,
18667 T_TI,
18668 T_EI,
18669 T_OI,
18670 T_MAX /* Size of enum. Keep last. */
18671 } neon_builtin_type_mode;
18672
18673 #define TYPE_MODE_BIT(X) (1 << (X))
18674
18675 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18676 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18677 | TYPE_MODE_BIT (T_DI))
18678 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18679 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18680 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18681
18682 #define v8qi_UP T_V8QI
18683 #define v4hi_UP T_V4HI
18684 #define v2si_UP T_V2SI
18685 #define v2sf_UP T_V2SF
18686 #define di_UP T_DI
18687 #define v16qi_UP T_V16QI
18688 #define v8hi_UP T_V8HI
18689 #define v4si_UP T_V4SI
18690 #define v4sf_UP T_V4SF
18691 #define v2di_UP T_V2DI
18692 #define ti_UP T_TI
18693 #define ei_UP T_EI
18694 #define oi_UP T_OI
18695
18696 #define UP(X) X##_UP
18697
18698 typedef enum {
18699 NEON_BINOP,
18700 NEON_TERNOP,
18701 NEON_UNOP,
18702 NEON_GETLANE,
18703 NEON_SETLANE,
18704 NEON_CREATE,
18705 NEON_DUP,
18706 NEON_DUPLANE,
18707 NEON_COMBINE,
18708 NEON_SPLIT,
18709 NEON_LANEMUL,
18710 NEON_LANEMULL,
18711 NEON_LANEMULH,
18712 NEON_LANEMAC,
18713 NEON_SCALARMUL,
18714 NEON_SCALARMULL,
18715 NEON_SCALARMULH,
18716 NEON_SCALARMAC,
18717 NEON_CONVERT,
18718 NEON_FIXCONV,
18719 NEON_SELECT,
18720 NEON_RESULTPAIR,
18721 NEON_REINTERP,
18722 NEON_VTBL,
18723 NEON_VTBX,
18724 NEON_LOAD1,
18725 NEON_LOAD1LANE,
18726 NEON_STORE1,
18727 NEON_STORE1LANE,
18728 NEON_LOADSTRUCT,
18729 NEON_LOADSTRUCTLANE,
18730 NEON_STORESTRUCT,
18731 NEON_STORESTRUCTLANE,
18732 NEON_LOGICBINOP,
18733 NEON_SHIFTINSERT,
18734 NEON_SHIFTIMM,
18735 NEON_SHIFTACC
18736 } neon_itype;
18737
18738 typedef struct {
18739 const char *name;
18740 const neon_itype itype;
18741 const neon_builtin_type_mode mode;
18742 const enum insn_code code;
18743 unsigned int fcode;
18744 } neon_builtin_datum;
18745
18746 #define CF(N,X) CODE_FOR_neon_##N##X
18747
18748 #define VAR1(T, N, A) \
18749 {#N, NEON_##T, UP (A), CF (N, A), 0}
18750 #define VAR2(T, N, A, B) \
18751 VAR1 (T, N, A), \
18752 {#N, NEON_##T, UP (B), CF (N, B), 0}
18753 #define VAR3(T, N, A, B, C) \
18754 VAR2 (T, N, A, B), \
18755 {#N, NEON_##T, UP (C), CF (N, C), 0}
18756 #define VAR4(T, N, A, B, C, D) \
18757 VAR3 (T, N, A, B, C), \
18758 {#N, NEON_##T, UP (D), CF (N, D), 0}
18759 #define VAR5(T, N, A, B, C, D, E) \
18760 VAR4 (T, N, A, B, C, D), \
18761 {#N, NEON_##T, UP (E), CF (N, E), 0}
18762 #define VAR6(T, N, A, B, C, D, E, F) \
18763 VAR5 (T, N, A, B, C, D, E), \
18764 {#N, NEON_##T, UP (F), CF (N, F), 0}
18765 #define VAR7(T, N, A, B, C, D, E, F, G) \
18766 VAR6 (T, N, A, B, C, D, E, F), \
18767 {#N, NEON_##T, UP (G), CF (N, G), 0}
18768 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18769 VAR7 (T, N, A, B, C, D, E, F, G), \
18770 {#N, NEON_##T, UP (H), CF (N, H), 0}
18771 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18772 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18773 {#N, NEON_##T, UP (I), CF (N, I), 0}
18774 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18775 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18776 {#N, NEON_##T, UP (J), CF (N, J), 0}
18777
18778 /* The mode entries in the following table correspond to the "key" type of the
18779 instruction variant, i.e. equivalent to that which would be specified after
18780 the assembler mnemonic, which usually refers to the last vector operand.
18781 (Signed/unsigned/polynomial types are not differentiated between though, and
18782 are all mapped onto the same mode for a given element size.) The modes
18783 listed per instruction should be the same as those defined for that
18784 instruction's pattern in neon.md. */
18785
18786 static neon_builtin_datum neon_builtin_data[] =
18787 {
18788 VAR10 (BINOP, vadd,
18789 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18790 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18791 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18792 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18793 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18794 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18795 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18796 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18797 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18798 VAR2 (TERNOP, vfma, v2sf, v4sf),
18799 VAR2 (TERNOP, vfms, v2sf, v4sf),
18800 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18801 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18802 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18803 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18804 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18805 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18806 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18807 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18808 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18809 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18810 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18811 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18812 VAR2 (BINOP, vqdmull, v4hi, v2si),
18813 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18814 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18815 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18816 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18817 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18818 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18819 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18820 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18821 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18822 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18823 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18824 VAR10 (BINOP, vsub,
18825 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18826 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18827 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18828 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18829 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18830 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18831 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18832 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18833 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18834 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18835 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18836 VAR2 (BINOP, vcage, v2sf, v4sf),
18837 VAR2 (BINOP, vcagt, v2sf, v4sf),
18838 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18839 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18840 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18841 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18842 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18843 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18844 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18845 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18846 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18847 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18848 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18849 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18850 VAR2 (BINOP, vrecps, v2sf, v4sf),
18851 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18852 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18853 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18854 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18855 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18856 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18857 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18858 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18859 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18860 VAR2 (UNOP, vcnt, v8qi, v16qi),
18861 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18862 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18863 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18864 /* FIXME: vget_lane supports more variants than this! */
18865 VAR10 (GETLANE, vget_lane,
18866 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18867 VAR10 (SETLANE, vset_lane,
18868 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18869 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18870 VAR10 (DUP, vdup_n,
18871 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18872 VAR10 (DUPLANE, vdup_lane,
18873 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18874 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18875 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18876 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18877 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18878 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18879 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18880 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18881 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18882 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18883 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18884 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18885 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18886 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18887 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18888 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18889 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18890 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18891 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18892 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18893 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18894 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18895 VAR10 (BINOP, vext,
18896 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18897 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18898 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18899 VAR2 (UNOP, vrev16, v8qi, v16qi),
18900 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18901 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18902 VAR10 (SELECT, vbsl,
18903 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18904 VAR1 (VTBL, vtbl1, v8qi),
18905 VAR1 (VTBL, vtbl2, v8qi),
18906 VAR1 (VTBL, vtbl3, v8qi),
18907 VAR1 (VTBL, vtbl4, v8qi),
18908 VAR1 (VTBX, vtbx1, v8qi),
18909 VAR1 (VTBX, vtbx2, v8qi),
18910 VAR1 (VTBX, vtbx3, v8qi),
18911 VAR1 (VTBX, vtbx4, v8qi),
18912 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18913 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18914 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18915 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18916 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18917 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18918 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18919 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18920 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18921 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18922 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18923 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18924 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18925 VAR10 (LOAD1, vld1,
18926 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18927 VAR10 (LOAD1LANE, vld1_lane,
18928 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18929 VAR10 (LOAD1, vld1_dup,
18930 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18931 VAR10 (STORE1, vst1,
18932 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18933 VAR10 (STORE1LANE, vst1_lane,
18934 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18935 VAR9 (LOADSTRUCT,
18936 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18937 VAR7 (LOADSTRUCTLANE, vld2_lane,
18938 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18939 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18940 VAR9 (STORESTRUCT, vst2,
18941 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18942 VAR7 (STORESTRUCTLANE, vst2_lane,
18943 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18944 VAR9 (LOADSTRUCT,
18945 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18946 VAR7 (LOADSTRUCTLANE, vld3_lane,
18947 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18948 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18949 VAR9 (STORESTRUCT, vst3,
18950 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18951 VAR7 (STORESTRUCTLANE, vst3_lane,
18952 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18953 VAR9 (LOADSTRUCT, vld4,
18954 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18955 VAR7 (LOADSTRUCTLANE, vld4_lane,
18956 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18957 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18958 VAR9 (STORESTRUCT, vst4,
18959 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18960 VAR7 (STORESTRUCTLANE, vst4_lane,
18961 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18962 VAR10 (LOGICBINOP, vand,
18963 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18964 VAR10 (LOGICBINOP, vorr,
18965 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18966 VAR10 (BINOP, veor,
18967 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18968 VAR10 (LOGICBINOP, vbic,
18969 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18970 VAR10 (LOGICBINOP, vorn,
18971 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18972 };
18973
18974 #undef CF
18975 #undef VAR1
18976 #undef VAR2
18977 #undef VAR3
18978 #undef VAR4
18979 #undef VAR5
18980 #undef VAR6
18981 #undef VAR7
18982 #undef VAR8
18983 #undef VAR9
18984 #undef VAR10
18985
18986 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18987 symbolic names defined here (which would require too much duplication).
18988 FIXME? */
18989 enum arm_builtins
18990 {
18991 ARM_BUILTIN_GETWCGR0,
18992 ARM_BUILTIN_GETWCGR1,
18993 ARM_BUILTIN_GETWCGR2,
18994 ARM_BUILTIN_GETWCGR3,
18995
18996 ARM_BUILTIN_SETWCGR0,
18997 ARM_BUILTIN_SETWCGR1,
18998 ARM_BUILTIN_SETWCGR2,
18999 ARM_BUILTIN_SETWCGR3,
19000
19001 ARM_BUILTIN_WZERO,
19002
19003 ARM_BUILTIN_WAVG2BR,
19004 ARM_BUILTIN_WAVG2HR,
19005 ARM_BUILTIN_WAVG2B,
19006 ARM_BUILTIN_WAVG2H,
19007
19008 ARM_BUILTIN_WACCB,
19009 ARM_BUILTIN_WACCH,
19010 ARM_BUILTIN_WACCW,
19011
19012 ARM_BUILTIN_WMACS,
19013 ARM_BUILTIN_WMACSZ,
19014 ARM_BUILTIN_WMACU,
19015 ARM_BUILTIN_WMACUZ,
19016
19017 ARM_BUILTIN_WSADB,
19018 ARM_BUILTIN_WSADBZ,
19019 ARM_BUILTIN_WSADH,
19020 ARM_BUILTIN_WSADHZ,
19021
19022 ARM_BUILTIN_WALIGNI,
19023 ARM_BUILTIN_WALIGNR0,
19024 ARM_BUILTIN_WALIGNR1,
19025 ARM_BUILTIN_WALIGNR2,
19026 ARM_BUILTIN_WALIGNR3,
19027
19028 ARM_BUILTIN_TMIA,
19029 ARM_BUILTIN_TMIAPH,
19030 ARM_BUILTIN_TMIABB,
19031 ARM_BUILTIN_TMIABT,
19032 ARM_BUILTIN_TMIATB,
19033 ARM_BUILTIN_TMIATT,
19034
19035 ARM_BUILTIN_TMOVMSKB,
19036 ARM_BUILTIN_TMOVMSKH,
19037 ARM_BUILTIN_TMOVMSKW,
19038
19039 ARM_BUILTIN_TBCSTB,
19040 ARM_BUILTIN_TBCSTH,
19041 ARM_BUILTIN_TBCSTW,
19042
19043 ARM_BUILTIN_WMADDS,
19044 ARM_BUILTIN_WMADDU,
19045
19046 ARM_BUILTIN_WPACKHSS,
19047 ARM_BUILTIN_WPACKWSS,
19048 ARM_BUILTIN_WPACKDSS,
19049 ARM_BUILTIN_WPACKHUS,
19050 ARM_BUILTIN_WPACKWUS,
19051 ARM_BUILTIN_WPACKDUS,
19052
19053 ARM_BUILTIN_WADDB,
19054 ARM_BUILTIN_WADDH,
19055 ARM_BUILTIN_WADDW,
19056 ARM_BUILTIN_WADDSSB,
19057 ARM_BUILTIN_WADDSSH,
19058 ARM_BUILTIN_WADDSSW,
19059 ARM_BUILTIN_WADDUSB,
19060 ARM_BUILTIN_WADDUSH,
19061 ARM_BUILTIN_WADDUSW,
19062 ARM_BUILTIN_WSUBB,
19063 ARM_BUILTIN_WSUBH,
19064 ARM_BUILTIN_WSUBW,
19065 ARM_BUILTIN_WSUBSSB,
19066 ARM_BUILTIN_WSUBSSH,
19067 ARM_BUILTIN_WSUBSSW,
19068 ARM_BUILTIN_WSUBUSB,
19069 ARM_BUILTIN_WSUBUSH,
19070 ARM_BUILTIN_WSUBUSW,
19071
19072 ARM_BUILTIN_WAND,
19073 ARM_BUILTIN_WANDN,
19074 ARM_BUILTIN_WOR,
19075 ARM_BUILTIN_WXOR,
19076
19077 ARM_BUILTIN_WCMPEQB,
19078 ARM_BUILTIN_WCMPEQH,
19079 ARM_BUILTIN_WCMPEQW,
19080 ARM_BUILTIN_WCMPGTUB,
19081 ARM_BUILTIN_WCMPGTUH,
19082 ARM_BUILTIN_WCMPGTUW,
19083 ARM_BUILTIN_WCMPGTSB,
19084 ARM_BUILTIN_WCMPGTSH,
19085 ARM_BUILTIN_WCMPGTSW,
19086
19087 ARM_BUILTIN_TEXTRMSB,
19088 ARM_BUILTIN_TEXTRMSH,
19089 ARM_BUILTIN_TEXTRMSW,
19090 ARM_BUILTIN_TEXTRMUB,
19091 ARM_BUILTIN_TEXTRMUH,
19092 ARM_BUILTIN_TEXTRMUW,
19093 ARM_BUILTIN_TINSRB,
19094 ARM_BUILTIN_TINSRH,
19095 ARM_BUILTIN_TINSRW,
19096
19097 ARM_BUILTIN_WMAXSW,
19098 ARM_BUILTIN_WMAXSH,
19099 ARM_BUILTIN_WMAXSB,
19100 ARM_BUILTIN_WMAXUW,
19101 ARM_BUILTIN_WMAXUH,
19102 ARM_BUILTIN_WMAXUB,
19103 ARM_BUILTIN_WMINSW,
19104 ARM_BUILTIN_WMINSH,
19105 ARM_BUILTIN_WMINSB,
19106 ARM_BUILTIN_WMINUW,
19107 ARM_BUILTIN_WMINUH,
19108 ARM_BUILTIN_WMINUB,
19109
19110 ARM_BUILTIN_WMULUM,
19111 ARM_BUILTIN_WMULSM,
19112 ARM_BUILTIN_WMULUL,
19113
19114 ARM_BUILTIN_PSADBH,
19115 ARM_BUILTIN_WSHUFH,
19116
19117 ARM_BUILTIN_WSLLH,
19118 ARM_BUILTIN_WSLLW,
19119 ARM_BUILTIN_WSLLD,
19120 ARM_BUILTIN_WSRAH,
19121 ARM_BUILTIN_WSRAW,
19122 ARM_BUILTIN_WSRAD,
19123 ARM_BUILTIN_WSRLH,
19124 ARM_BUILTIN_WSRLW,
19125 ARM_BUILTIN_WSRLD,
19126 ARM_BUILTIN_WRORH,
19127 ARM_BUILTIN_WRORW,
19128 ARM_BUILTIN_WRORD,
19129 ARM_BUILTIN_WSLLHI,
19130 ARM_BUILTIN_WSLLWI,
19131 ARM_BUILTIN_WSLLDI,
19132 ARM_BUILTIN_WSRAHI,
19133 ARM_BUILTIN_WSRAWI,
19134 ARM_BUILTIN_WSRADI,
19135 ARM_BUILTIN_WSRLHI,
19136 ARM_BUILTIN_WSRLWI,
19137 ARM_BUILTIN_WSRLDI,
19138 ARM_BUILTIN_WRORHI,
19139 ARM_BUILTIN_WRORWI,
19140 ARM_BUILTIN_WRORDI,
19141
19142 ARM_BUILTIN_WUNPCKIHB,
19143 ARM_BUILTIN_WUNPCKIHH,
19144 ARM_BUILTIN_WUNPCKIHW,
19145 ARM_BUILTIN_WUNPCKILB,
19146 ARM_BUILTIN_WUNPCKILH,
19147 ARM_BUILTIN_WUNPCKILW,
19148
19149 ARM_BUILTIN_WUNPCKEHSB,
19150 ARM_BUILTIN_WUNPCKEHSH,
19151 ARM_BUILTIN_WUNPCKEHSW,
19152 ARM_BUILTIN_WUNPCKEHUB,
19153 ARM_BUILTIN_WUNPCKEHUH,
19154 ARM_BUILTIN_WUNPCKEHUW,
19155 ARM_BUILTIN_WUNPCKELSB,
19156 ARM_BUILTIN_WUNPCKELSH,
19157 ARM_BUILTIN_WUNPCKELSW,
19158 ARM_BUILTIN_WUNPCKELUB,
19159 ARM_BUILTIN_WUNPCKELUH,
19160 ARM_BUILTIN_WUNPCKELUW,
19161
19162 ARM_BUILTIN_WABSB,
19163 ARM_BUILTIN_WABSH,
19164 ARM_BUILTIN_WABSW,
19165
19166 ARM_BUILTIN_WADDSUBHX,
19167 ARM_BUILTIN_WSUBADDHX,
19168
19169 ARM_BUILTIN_WABSDIFFB,
19170 ARM_BUILTIN_WABSDIFFH,
19171 ARM_BUILTIN_WABSDIFFW,
19172
19173 ARM_BUILTIN_WADDCH,
19174 ARM_BUILTIN_WADDCW,
19175
19176 ARM_BUILTIN_WAVG4,
19177 ARM_BUILTIN_WAVG4R,
19178
19179 ARM_BUILTIN_WMADDSX,
19180 ARM_BUILTIN_WMADDUX,
19181
19182 ARM_BUILTIN_WMADDSN,
19183 ARM_BUILTIN_WMADDUN,
19184
19185 ARM_BUILTIN_WMULWSM,
19186 ARM_BUILTIN_WMULWUM,
19187
19188 ARM_BUILTIN_WMULWSMR,
19189 ARM_BUILTIN_WMULWUMR,
19190
19191 ARM_BUILTIN_WMULWL,
19192
19193 ARM_BUILTIN_WMULSMR,
19194 ARM_BUILTIN_WMULUMR,
19195
19196 ARM_BUILTIN_WQMULM,
19197 ARM_BUILTIN_WQMULMR,
19198
19199 ARM_BUILTIN_WQMULWM,
19200 ARM_BUILTIN_WQMULWMR,
19201
19202 ARM_BUILTIN_WADDBHUSM,
19203 ARM_BUILTIN_WADDBHUSL,
19204
19205 ARM_BUILTIN_WQMIABB,
19206 ARM_BUILTIN_WQMIABT,
19207 ARM_BUILTIN_WQMIATB,
19208 ARM_BUILTIN_WQMIATT,
19209
19210 ARM_BUILTIN_WQMIABBN,
19211 ARM_BUILTIN_WQMIABTN,
19212 ARM_BUILTIN_WQMIATBN,
19213 ARM_BUILTIN_WQMIATTN,
19214
19215 ARM_BUILTIN_WMIABB,
19216 ARM_BUILTIN_WMIABT,
19217 ARM_BUILTIN_WMIATB,
19218 ARM_BUILTIN_WMIATT,
19219
19220 ARM_BUILTIN_WMIABBN,
19221 ARM_BUILTIN_WMIABTN,
19222 ARM_BUILTIN_WMIATBN,
19223 ARM_BUILTIN_WMIATTN,
19224
19225 ARM_BUILTIN_WMIAWBB,
19226 ARM_BUILTIN_WMIAWBT,
19227 ARM_BUILTIN_WMIAWTB,
19228 ARM_BUILTIN_WMIAWTT,
19229
19230 ARM_BUILTIN_WMIAWBBN,
19231 ARM_BUILTIN_WMIAWBTN,
19232 ARM_BUILTIN_WMIAWTBN,
19233 ARM_BUILTIN_WMIAWTTN,
19234
19235 ARM_BUILTIN_WMERGE,
19236
19237 ARM_BUILTIN_NEON_BASE,
19238
19239 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19240 };
19241
19242 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19243
19244 static void
19245 arm_init_neon_builtins (void)
19246 {
19247 unsigned int i, fcode;
19248 tree decl;
19249
19250 tree neon_intQI_type_node;
19251 tree neon_intHI_type_node;
19252 tree neon_polyQI_type_node;
19253 tree neon_polyHI_type_node;
19254 tree neon_intSI_type_node;
19255 tree neon_intDI_type_node;
19256 tree neon_float_type_node;
19257
19258 tree intQI_pointer_node;
19259 tree intHI_pointer_node;
19260 tree intSI_pointer_node;
19261 tree intDI_pointer_node;
19262 tree float_pointer_node;
19263
19264 tree const_intQI_node;
19265 tree const_intHI_node;
19266 tree const_intSI_node;
19267 tree const_intDI_node;
19268 tree const_float_node;
19269
19270 tree const_intQI_pointer_node;
19271 tree const_intHI_pointer_node;
19272 tree const_intSI_pointer_node;
19273 tree const_intDI_pointer_node;
19274 tree const_float_pointer_node;
19275
19276 tree V8QI_type_node;
19277 tree V4HI_type_node;
19278 tree V2SI_type_node;
19279 tree V2SF_type_node;
19280 tree V16QI_type_node;
19281 tree V8HI_type_node;
19282 tree V4SI_type_node;
19283 tree V4SF_type_node;
19284 tree V2DI_type_node;
19285
19286 tree intUQI_type_node;
19287 tree intUHI_type_node;
19288 tree intUSI_type_node;
19289 tree intUDI_type_node;
19290
19291 tree intEI_type_node;
19292 tree intOI_type_node;
19293 tree intCI_type_node;
19294 tree intXI_type_node;
19295
19296 tree V8QI_pointer_node;
19297 tree V4HI_pointer_node;
19298 tree V2SI_pointer_node;
19299 tree V2SF_pointer_node;
19300 tree V16QI_pointer_node;
19301 tree V8HI_pointer_node;
19302 tree V4SI_pointer_node;
19303 tree V4SF_pointer_node;
19304 tree V2DI_pointer_node;
19305
19306 tree void_ftype_pv8qi_v8qi_v8qi;
19307 tree void_ftype_pv4hi_v4hi_v4hi;
19308 tree void_ftype_pv2si_v2si_v2si;
19309 tree void_ftype_pv2sf_v2sf_v2sf;
19310 tree void_ftype_pdi_di_di;
19311 tree void_ftype_pv16qi_v16qi_v16qi;
19312 tree void_ftype_pv8hi_v8hi_v8hi;
19313 tree void_ftype_pv4si_v4si_v4si;
19314 tree void_ftype_pv4sf_v4sf_v4sf;
19315 tree void_ftype_pv2di_v2di_v2di;
19316
19317 tree reinterp_ftype_dreg[5][5];
19318 tree reinterp_ftype_qreg[5][5];
19319 tree dreg_types[5], qreg_types[5];
19320
19321 /* Create distinguished type nodes for NEON vector element types,
19322 and pointers to values of such types, so we can detect them later. */
19323 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19324 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19325 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19326 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19327 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19328 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19329 neon_float_type_node = make_node (REAL_TYPE);
19330 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19331 layout_type (neon_float_type_node);
19332
19333 /* Define typedefs which exactly correspond to the modes we are basing vector
19334 types on. If you change these names you'll need to change
19335 the table used by arm_mangle_type too. */
19336 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19337 "__builtin_neon_qi");
19338 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19339 "__builtin_neon_hi");
19340 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19341 "__builtin_neon_si");
19342 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19343 "__builtin_neon_sf");
19344 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19345 "__builtin_neon_di");
19346 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19347 "__builtin_neon_poly8");
19348 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19349 "__builtin_neon_poly16");
19350
19351 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19352 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19353 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19354 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19355 float_pointer_node = build_pointer_type (neon_float_type_node);
19356
19357 /* Next create constant-qualified versions of the above types. */
19358 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19359 TYPE_QUAL_CONST);
19360 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19361 TYPE_QUAL_CONST);
19362 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19363 TYPE_QUAL_CONST);
19364 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19365 TYPE_QUAL_CONST);
19366 const_float_node = build_qualified_type (neon_float_type_node,
19367 TYPE_QUAL_CONST);
19368
19369 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19370 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19371 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19372 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19373 const_float_pointer_node = build_pointer_type (const_float_node);
19374
19375 /* Now create vector types based on our NEON element types. */
19376 /* 64-bit vectors. */
19377 V8QI_type_node =
19378 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19379 V4HI_type_node =
19380 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19381 V2SI_type_node =
19382 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19383 V2SF_type_node =
19384 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19385 /* 128-bit vectors. */
19386 V16QI_type_node =
19387 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19388 V8HI_type_node =
19389 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19390 V4SI_type_node =
19391 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19392 V4SF_type_node =
19393 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19394 V2DI_type_node =
19395 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19396
19397 /* Unsigned integer types for various mode sizes. */
19398 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19399 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19400 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19401 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19402
19403 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19404 "__builtin_neon_uqi");
19405 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19406 "__builtin_neon_uhi");
19407 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19408 "__builtin_neon_usi");
19409 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19410 "__builtin_neon_udi");
19411
19412 /* Opaque integer types for structures of vectors. */
19413 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19414 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19415 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19416 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19417
19418 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19419 "__builtin_neon_ti");
19420 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19421 "__builtin_neon_ei");
19422 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19423 "__builtin_neon_oi");
19424 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19425 "__builtin_neon_ci");
19426 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19427 "__builtin_neon_xi");
19428
19429 /* Pointers to vector types. */
19430 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19431 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19432 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19433 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19434 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19435 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19436 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19437 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19438 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19439
19440 /* Operations which return results as pairs. */
19441 void_ftype_pv8qi_v8qi_v8qi =
19442 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19443 V8QI_type_node, NULL);
19444 void_ftype_pv4hi_v4hi_v4hi =
19445 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19446 V4HI_type_node, NULL);
19447 void_ftype_pv2si_v2si_v2si =
19448 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19449 V2SI_type_node, NULL);
19450 void_ftype_pv2sf_v2sf_v2sf =
19451 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19452 V2SF_type_node, NULL);
19453 void_ftype_pdi_di_di =
19454 build_function_type_list (void_type_node, intDI_pointer_node,
19455 neon_intDI_type_node, neon_intDI_type_node, NULL);
19456 void_ftype_pv16qi_v16qi_v16qi =
19457 build_function_type_list (void_type_node, V16QI_pointer_node,
19458 V16QI_type_node, V16QI_type_node, NULL);
19459 void_ftype_pv8hi_v8hi_v8hi =
19460 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19461 V8HI_type_node, NULL);
19462 void_ftype_pv4si_v4si_v4si =
19463 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19464 V4SI_type_node, NULL);
19465 void_ftype_pv4sf_v4sf_v4sf =
19466 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19467 V4SF_type_node, NULL);
19468 void_ftype_pv2di_v2di_v2di =
19469 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19470 V2DI_type_node, NULL);
19471
19472 dreg_types[0] = V8QI_type_node;
19473 dreg_types[1] = V4HI_type_node;
19474 dreg_types[2] = V2SI_type_node;
19475 dreg_types[3] = V2SF_type_node;
19476 dreg_types[4] = neon_intDI_type_node;
19477
19478 qreg_types[0] = V16QI_type_node;
19479 qreg_types[1] = V8HI_type_node;
19480 qreg_types[2] = V4SI_type_node;
19481 qreg_types[3] = V4SF_type_node;
19482 qreg_types[4] = V2DI_type_node;
19483
19484 for (i = 0; i < 5; i++)
19485 {
19486 int j;
19487 for (j = 0; j < 5; j++)
19488 {
19489 reinterp_ftype_dreg[i][j]
19490 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19491 reinterp_ftype_qreg[i][j]
19492 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19493 }
19494 }
19495
19496 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19497 i < ARRAY_SIZE (neon_builtin_data);
19498 i++, fcode++)
19499 {
19500 neon_builtin_datum *d = &neon_builtin_data[i];
19501
19502 const char* const modenames[] = {
19503 "v8qi", "v4hi", "v2si", "v2sf", "di",
19504 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19505 "ti", "ei", "oi"
19506 };
19507 char namebuf[60];
19508 tree ftype = NULL;
19509 int is_load = 0, is_store = 0;
19510
19511 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19512
19513 d->fcode = fcode;
19514
19515 switch (d->itype)
19516 {
19517 case NEON_LOAD1:
19518 case NEON_LOAD1LANE:
19519 case NEON_LOADSTRUCT:
19520 case NEON_LOADSTRUCTLANE:
19521 is_load = 1;
19522 /* Fall through. */
19523 case NEON_STORE1:
19524 case NEON_STORE1LANE:
19525 case NEON_STORESTRUCT:
19526 case NEON_STORESTRUCTLANE:
19527 if (!is_load)
19528 is_store = 1;
19529 /* Fall through. */
19530 case NEON_UNOP:
19531 case NEON_BINOP:
19532 case NEON_LOGICBINOP:
19533 case NEON_SHIFTINSERT:
19534 case NEON_TERNOP:
19535 case NEON_GETLANE:
19536 case NEON_SETLANE:
19537 case NEON_CREATE:
19538 case NEON_DUP:
19539 case NEON_DUPLANE:
19540 case NEON_SHIFTIMM:
19541 case NEON_SHIFTACC:
19542 case NEON_COMBINE:
19543 case NEON_SPLIT:
19544 case NEON_CONVERT:
19545 case NEON_FIXCONV:
19546 case NEON_LANEMUL:
19547 case NEON_LANEMULL:
19548 case NEON_LANEMULH:
19549 case NEON_LANEMAC:
19550 case NEON_SCALARMUL:
19551 case NEON_SCALARMULL:
19552 case NEON_SCALARMULH:
19553 case NEON_SCALARMAC:
19554 case NEON_SELECT:
19555 case NEON_VTBL:
19556 case NEON_VTBX:
19557 {
19558 int k;
19559 tree return_type = void_type_node, args = void_list_node;
19560
19561 /* Build a function type directly from the insn_data for
19562 this builtin. The build_function_type() function takes
19563 care of removing duplicates for us. */
19564 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19565 {
19566 tree eltype;
19567
19568 if (is_load && k == 1)
19569 {
19570 /* Neon load patterns always have the memory
19571 operand in the operand 1 position. */
19572 gcc_assert (insn_data[d->code].operand[k].predicate
19573 == neon_struct_operand);
19574
19575 switch (d->mode)
19576 {
19577 case T_V8QI:
19578 case T_V16QI:
19579 eltype = const_intQI_pointer_node;
19580 break;
19581
19582 case T_V4HI:
19583 case T_V8HI:
19584 eltype = const_intHI_pointer_node;
19585 break;
19586
19587 case T_V2SI:
19588 case T_V4SI:
19589 eltype = const_intSI_pointer_node;
19590 break;
19591
19592 case T_V2SF:
19593 case T_V4SF:
19594 eltype = const_float_pointer_node;
19595 break;
19596
19597 case T_DI:
19598 case T_V2DI:
19599 eltype = const_intDI_pointer_node;
19600 break;
19601
19602 default: gcc_unreachable ();
19603 }
19604 }
19605 else if (is_store && k == 0)
19606 {
19607 /* Similarly, Neon store patterns use operand 0 as
19608 the memory location to store to. */
19609 gcc_assert (insn_data[d->code].operand[k].predicate
19610 == neon_struct_operand);
19611
19612 switch (d->mode)
19613 {
19614 case T_V8QI:
19615 case T_V16QI:
19616 eltype = intQI_pointer_node;
19617 break;
19618
19619 case T_V4HI:
19620 case T_V8HI:
19621 eltype = intHI_pointer_node;
19622 break;
19623
19624 case T_V2SI:
19625 case T_V4SI:
19626 eltype = intSI_pointer_node;
19627 break;
19628
19629 case T_V2SF:
19630 case T_V4SF:
19631 eltype = float_pointer_node;
19632 break;
19633
19634 case T_DI:
19635 case T_V2DI:
19636 eltype = intDI_pointer_node;
19637 break;
19638
19639 default: gcc_unreachable ();
19640 }
19641 }
19642 else
19643 {
19644 switch (insn_data[d->code].operand[k].mode)
19645 {
19646 case VOIDmode: eltype = void_type_node; break;
19647 /* Scalars. */
19648 case QImode: eltype = neon_intQI_type_node; break;
19649 case HImode: eltype = neon_intHI_type_node; break;
19650 case SImode: eltype = neon_intSI_type_node; break;
19651 case SFmode: eltype = neon_float_type_node; break;
19652 case DImode: eltype = neon_intDI_type_node; break;
19653 case TImode: eltype = intTI_type_node; break;
19654 case EImode: eltype = intEI_type_node; break;
19655 case OImode: eltype = intOI_type_node; break;
19656 case CImode: eltype = intCI_type_node; break;
19657 case XImode: eltype = intXI_type_node; break;
19658 /* 64-bit vectors. */
19659 case V8QImode: eltype = V8QI_type_node; break;
19660 case V4HImode: eltype = V4HI_type_node; break;
19661 case V2SImode: eltype = V2SI_type_node; break;
19662 case V2SFmode: eltype = V2SF_type_node; break;
19663 /* 128-bit vectors. */
19664 case V16QImode: eltype = V16QI_type_node; break;
19665 case V8HImode: eltype = V8HI_type_node; break;
19666 case V4SImode: eltype = V4SI_type_node; break;
19667 case V4SFmode: eltype = V4SF_type_node; break;
19668 case V2DImode: eltype = V2DI_type_node; break;
19669 default: gcc_unreachable ();
19670 }
19671 }
19672
19673 if (k == 0 && !is_store)
19674 return_type = eltype;
19675 else
19676 args = tree_cons (NULL_TREE, eltype, args);
19677 }
19678
19679 ftype = build_function_type (return_type, args);
19680 }
19681 break;
19682
19683 case NEON_RESULTPAIR:
19684 {
19685 switch (insn_data[d->code].operand[1].mode)
19686 {
19687 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19688 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19689 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19690 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19691 case DImode: ftype = void_ftype_pdi_di_di; break;
19692 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19693 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19694 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19695 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19696 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19697 default: gcc_unreachable ();
19698 }
19699 }
19700 break;
19701
19702 case NEON_REINTERP:
19703 {
19704 /* We iterate over 5 doubleword types, then 5 quadword
19705 types. */
19706 int rhs = d->mode % 5;
19707 switch (insn_data[d->code].operand[0].mode)
19708 {
19709 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19710 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19711 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19712 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19713 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19714 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19715 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19716 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19717 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19718 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19719 default: gcc_unreachable ();
19720 }
19721 }
19722 break;
19723
19724 default:
19725 gcc_unreachable ();
19726 }
19727
19728 gcc_assert (ftype != NULL);
19729
19730 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19731
19732 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19733 NULL_TREE);
19734 arm_builtin_decls[fcode] = decl;
19735 }
19736 }
19737
19738 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19739 do \
19740 { \
19741 if ((MASK) & insn_flags) \
19742 { \
19743 tree bdecl; \
19744 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19745 BUILT_IN_MD, NULL, NULL_TREE); \
19746 arm_builtin_decls[CODE] = bdecl; \
19747 } \
19748 } \
19749 while (0)
19750
19751 struct builtin_description
19752 {
19753 const unsigned int mask;
19754 const enum insn_code icode;
19755 const char * const name;
19756 const enum arm_builtins code;
19757 const enum rtx_code comparison;
19758 const unsigned int flag;
19759 };
19760
19761 static const struct builtin_description bdesc_2arg[] =
19762 {
19763 #define IWMMXT_BUILTIN(code, string, builtin) \
19764 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19765 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19766
19767 #define IWMMXT2_BUILTIN(code, string, builtin) \
19768 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19769 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19770
19771 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19772 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19773 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19774 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19775 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19776 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19777 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19778 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19779 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19780 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19781 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19782 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19783 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19784 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19785 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19786 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19787 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19788 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19789 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19790 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19791 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19792 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19793 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19794 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19795 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19796 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19797 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19798 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19799 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19800 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19801 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19802 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19803 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19804 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19805 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19806 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19807 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19808 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19809 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19810 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19811 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19812 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19813 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19814 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19815 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19816 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19817 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19818 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19819 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19820 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19821 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19822 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19823 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19824 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19825 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19826 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19827 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19828 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19829 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19830 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19831 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19832 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19833 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19834 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19835 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19836 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19837 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19838 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19839 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19840 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19841 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19842 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19843 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19844 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19845 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19846 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19847 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19848 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19849
19850 #define IWMMXT_BUILTIN2(code, builtin) \
19851 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19852
19853 #define IWMMXT2_BUILTIN2(code, builtin) \
19854 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19855
19856 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19857 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19858 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19859 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19860 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19861 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19862 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19863 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19864 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19865 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19866 };
19867
19868 static const struct builtin_description bdesc_1arg[] =
19869 {
19870 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19871 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19872 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19873 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19874 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19875 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19876 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19877 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19878 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19879 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19880 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19881 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19882 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19883 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19884 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19885 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19886 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19887 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19888 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19889 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19890 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19891 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19892 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19893 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19894 };
19895
19896 /* Set up all the iWMMXt builtins. This is not called if
19897 TARGET_IWMMXT is zero. */
19898
19899 static void
19900 arm_init_iwmmxt_builtins (void)
19901 {
19902 const struct builtin_description * d;
19903 size_t i;
19904
19905 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19906 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19907 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19908
19909 tree v8qi_ftype_v8qi_v8qi_int
19910 = build_function_type_list (V8QI_type_node,
19911 V8QI_type_node, V8QI_type_node,
19912 integer_type_node, NULL_TREE);
19913 tree v4hi_ftype_v4hi_int
19914 = build_function_type_list (V4HI_type_node,
19915 V4HI_type_node, integer_type_node, NULL_TREE);
19916 tree v2si_ftype_v2si_int
19917 = build_function_type_list (V2SI_type_node,
19918 V2SI_type_node, integer_type_node, NULL_TREE);
19919 tree v2si_ftype_di_di
19920 = build_function_type_list (V2SI_type_node,
19921 long_long_integer_type_node,
19922 long_long_integer_type_node,
19923 NULL_TREE);
19924 tree di_ftype_di_int
19925 = build_function_type_list (long_long_integer_type_node,
19926 long_long_integer_type_node,
19927 integer_type_node, NULL_TREE);
19928 tree di_ftype_di_int_int
19929 = build_function_type_list (long_long_integer_type_node,
19930 long_long_integer_type_node,
19931 integer_type_node,
19932 integer_type_node, NULL_TREE);
19933 tree int_ftype_v8qi
19934 = build_function_type_list (integer_type_node,
19935 V8QI_type_node, NULL_TREE);
19936 tree int_ftype_v4hi
19937 = build_function_type_list (integer_type_node,
19938 V4HI_type_node, NULL_TREE);
19939 tree int_ftype_v2si
19940 = build_function_type_list (integer_type_node,
19941 V2SI_type_node, NULL_TREE);
19942 tree int_ftype_v8qi_int
19943 = build_function_type_list (integer_type_node,
19944 V8QI_type_node, integer_type_node, NULL_TREE);
19945 tree int_ftype_v4hi_int
19946 = build_function_type_list (integer_type_node,
19947 V4HI_type_node, integer_type_node, NULL_TREE);
19948 tree int_ftype_v2si_int
19949 = build_function_type_list (integer_type_node,
19950 V2SI_type_node, integer_type_node, NULL_TREE);
19951 tree v8qi_ftype_v8qi_int_int
19952 = build_function_type_list (V8QI_type_node,
19953 V8QI_type_node, integer_type_node,
19954 integer_type_node, NULL_TREE);
19955 tree v4hi_ftype_v4hi_int_int
19956 = build_function_type_list (V4HI_type_node,
19957 V4HI_type_node, integer_type_node,
19958 integer_type_node, NULL_TREE);
19959 tree v2si_ftype_v2si_int_int
19960 = build_function_type_list (V2SI_type_node,
19961 V2SI_type_node, integer_type_node,
19962 integer_type_node, NULL_TREE);
19963 /* Miscellaneous. */
19964 tree v8qi_ftype_v4hi_v4hi
19965 = build_function_type_list (V8QI_type_node,
19966 V4HI_type_node, V4HI_type_node, NULL_TREE);
19967 tree v4hi_ftype_v2si_v2si
19968 = build_function_type_list (V4HI_type_node,
19969 V2SI_type_node, V2SI_type_node, NULL_TREE);
19970 tree v8qi_ftype_v4hi_v8qi
19971 = build_function_type_list (V8QI_type_node,
19972 V4HI_type_node, V8QI_type_node, NULL_TREE);
19973 tree v2si_ftype_v4hi_v4hi
19974 = build_function_type_list (V2SI_type_node,
19975 V4HI_type_node, V4HI_type_node, NULL_TREE);
19976 tree v2si_ftype_v8qi_v8qi
19977 = build_function_type_list (V2SI_type_node,
19978 V8QI_type_node, V8QI_type_node, NULL_TREE);
19979 tree v4hi_ftype_v4hi_di
19980 = build_function_type_list (V4HI_type_node,
19981 V4HI_type_node, long_long_integer_type_node,
19982 NULL_TREE);
19983 tree v2si_ftype_v2si_di
19984 = build_function_type_list (V2SI_type_node,
19985 V2SI_type_node, long_long_integer_type_node,
19986 NULL_TREE);
19987 tree di_ftype_void
19988 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19989 tree int_ftype_void
19990 = build_function_type_list (integer_type_node, NULL_TREE);
19991 tree di_ftype_v8qi
19992 = build_function_type_list (long_long_integer_type_node,
19993 V8QI_type_node, NULL_TREE);
19994 tree di_ftype_v4hi
19995 = build_function_type_list (long_long_integer_type_node,
19996 V4HI_type_node, NULL_TREE);
19997 tree di_ftype_v2si
19998 = build_function_type_list (long_long_integer_type_node,
19999 V2SI_type_node, NULL_TREE);
20000 tree v2si_ftype_v4hi
20001 = build_function_type_list (V2SI_type_node,
20002 V4HI_type_node, NULL_TREE);
20003 tree v4hi_ftype_v8qi
20004 = build_function_type_list (V4HI_type_node,
20005 V8QI_type_node, NULL_TREE);
20006 tree v8qi_ftype_v8qi
20007 = build_function_type_list (V8QI_type_node,
20008 V8QI_type_node, NULL_TREE);
20009 tree v4hi_ftype_v4hi
20010 = build_function_type_list (V4HI_type_node,
20011 V4HI_type_node, NULL_TREE);
20012 tree v2si_ftype_v2si
20013 = build_function_type_list (V2SI_type_node,
20014 V2SI_type_node, NULL_TREE);
20015
20016 tree di_ftype_di_v4hi_v4hi
20017 = build_function_type_list (long_long_unsigned_type_node,
20018 long_long_unsigned_type_node,
20019 V4HI_type_node, V4HI_type_node,
20020 NULL_TREE);
20021
20022 tree di_ftype_v4hi_v4hi
20023 = build_function_type_list (long_long_unsigned_type_node,
20024 V4HI_type_node,V4HI_type_node,
20025 NULL_TREE);
20026
20027 tree v2si_ftype_v2si_v4hi_v4hi
20028 = build_function_type_list (V2SI_type_node,
20029 V2SI_type_node, V4HI_type_node,
20030 V4HI_type_node, NULL_TREE);
20031
20032 tree v2si_ftype_v2si_v8qi_v8qi
20033 = build_function_type_list (V2SI_type_node,
20034 V2SI_type_node, V8QI_type_node,
20035 V8QI_type_node, NULL_TREE);
20036
20037 tree di_ftype_di_v2si_v2si
20038 = build_function_type_list (long_long_unsigned_type_node,
20039 long_long_unsigned_type_node,
20040 V2SI_type_node, V2SI_type_node,
20041 NULL_TREE);
20042
20043 tree di_ftype_di_di_int
20044 = build_function_type_list (long_long_unsigned_type_node,
20045 long_long_unsigned_type_node,
20046 long_long_unsigned_type_node,
20047 integer_type_node, NULL_TREE);
20048
20049 tree void_ftype_int
20050 = build_function_type_list (void_type_node,
20051 integer_type_node, NULL_TREE);
20052
20053 tree v8qi_ftype_char
20054 = build_function_type_list (V8QI_type_node,
20055 signed_char_type_node, NULL_TREE);
20056
20057 tree v4hi_ftype_short
20058 = build_function_type_list (V4HI_type_node,
20059 short_integer_type_node, NULL_TREE);
20060
20061 tree v2si_ftype_int
20062 = build_function_type_list (V2SI_type_node,
20063 integer_type_node, NULL_TREE);
20064
20065 /* Normal vector binops. */
20066 tree v8qi_ftype_v8qi_v8qi
20067 = build_function_type_list (V8QI_type_node,
20068 V8QI_type_node, V8QI_type_node, NULL_TREE);
20069 tree v4hi_ftype_v4hi_v4hi
20070 = build_function_type_list (V4HI_type_node,
20071 V4HI_type_node,V4HI_type_node, NULL_TREE);
20072 tree v2si_ftype_v2si_v2si
20073 = build_function_type_list (V2SI_type_node,
20074 V2SI_type_node, V2SI_type_node, NULL_TREE);
20075 tree di_ftype_di_di
20076 = build_function_type_list (long_long_unsigned_type_node,
20077 long_long_unsigned_type_node,
20078 long_long_unsigned_type_node,
20079 NULL_TREE);
20080
20081 /* Add all builtins that are more or less simple operations on two
20082 operands. */
20083 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20084 {
20085 /* Use one of the operands; the target can have a different mode for
20086 mask-generating compares. */
20087 enum machine_mode mode;
20088 tree type;
20089
20090 if (d->name == 0)
20091 continue;
20092
20093 mode = insn_data[d->icode].operand[1].mode;
20094
20095 switch (mode)
20096 {
20097 case V8QImode:
20098 type = v8qi_ftype_v8qi_v8qi;
20099 break;
20100 case V4HImode:
20101 type = v4hi_ftype_v4hi_v4hi;
20102 break;
20103 case V2SImode:
20104 type = v2si_ftype_v2si_v2si;
20105 break;
20106 case DImode:
20107 type = di_ftype_di_di;
20108 break;
20109
20110 default:
20111 gcc_unreachable ();
20112 }
20113
20114 def_mbuiltin (d->mask, d->name, type, d->code);
20115 }
20116
20117 /* Add the remaining MMX insns with somewhat more complicated types. */
20118 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20119 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20120 ARM_BUILTIN_ ## CODE)
20121
20122 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20123 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20124 ARM_BUILTIN_ ## CODE)
20125
20126 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20127 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20128 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20129 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20130 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20131 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20132 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20133 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20134 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20135
20136 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20137 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20138 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20139 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20140 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20141 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20142
20143 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20144 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20145 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20146 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20147 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20148 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20149
20150 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20151 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20152 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20153 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20154 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20155 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20156
20157 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20158 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20159 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20160 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20161 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20162 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20163
20164 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20165
20166 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20167 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20168 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20169 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20170 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20171 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20172 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20173 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20174 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20175 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20176
20177 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20178 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20179 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20180 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20181 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20182 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20183 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20184 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20185 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20186
20187 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20188 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20189 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20190
20191 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20192 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20193 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20194
20195 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20196 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20197
20198 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20199 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20200 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20201 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20202 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20203 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20204
20205 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20206 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20207 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20208 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20209 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20210 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20211 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20212 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20213 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20214 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20215 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20216 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20217
20218 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20219 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20220 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20221 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20222
20223 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20224 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20225 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20226 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20227 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20228 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20229 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20230
20231 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20232 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20233 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20234
20235 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20236 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20237 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20238 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20239
20240 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20241 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20242 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20243 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20244
20245 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20246 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20247 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20248 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20249
20250 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20251 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20252 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20253 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20254
20255 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20256 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20257 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20258 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20259
20260 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20261 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20262 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20263 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20264
20265 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20266
20267 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20268 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20269 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20270
20271 #undef iwmmx_mbuiltin
20272 #undef iwmmx2_mbuiltin
20273 }
20274
20275 static void
20276 arm_init_fp16_builtins (void)
20277 {
20278 tree fp16_type = make_node (REAL_TYPE);
20279 TYPE_PRECISION (fp16_type) = 16;
20280 layout_type (fp16_type);
20281 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20282 }
20283
20284 static void
20285 arm_init_builtins (void)
20286 {
20287 if (TARGET_REALLY_IWMMXT)
20288 arm_init_iwmmxt_builtins ();
20289
20290 if (TARGET_NEON)
20291 arm_init_neon_builtins ();
20292
20293 if (arm_fp16_format)
20294 arm_init_fp16_builtins ();
20295 }
20296
20297 /* Return the ARM builtin for CODE. */
20298
20299 static tree
20300 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20301 {
20302 if (code >= ARM_BUILTIN_MAX)
20303 return error_mark_node;
20304
20305 return arm_builtin_decls[code];
20306 }
20307
20308 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20309
20310 static const char *
20311 arm_invalid_parameter_type (const_tree t)
20312 {
20313 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20314 return N_("function parameters cannot have __fp16 type");
20315 return NULL;
20316 }
20317
20318 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20319
20320 static const char *
20321 arm_invalid_return_type (const_tree t)
20322 {
20323 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20324 return N_("functions cannot return __fp16 type");
20325 return NULL;
20326 }
20327
20328 /* Implement TARGET_PROMOTED_TYPE. */
20329
20330 static tree
20331 arm_promoted_type (const_tree t)
20332 {
20333 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20334 return float_type_node;
20335 return NULL_TREE;
20336 }
20337
20338 /* Implement TARGET_CONVERT_TO_TYPE.
20339 Specifically, this hook implements the peculiarity of the ARM
20340 half-precision floating-point C semantics that requires conversions between
20341 __fp16 to or from double to do an intermediate conversion to float. */
20342
20343 static tree
20344 arm_convert_to_type (tree type, tree expr)
20345 {
20346 tree fromtype = TREE_TYPE (expr);
20347 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20348 return NULL_TREE;
20349 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20350 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20351 return convert (type, convert (float_type_node, expr));
20352 return NULL_TREE;
20353 }
20354
20355 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20356 This simply adds HFmode as a supported mode; even though we don't
20357 implement arithmetic on this type directly, it's supported by
20358 optabs conversions, much the way the double-word arithmetic is
20359 special-cased in the default hook. */
20360
20361 static bool
20362 arm_scalar_mode_supported_p (enum machine_mode mode)
20363 {
20364 if (mode == HFmode)
20365 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20366 else if (ALL_FIXED_POINT_MODE_P (mode))
20367 return true;
20368 else
20369 return default_scalar_mode_supported_p (mode);
20370 }
20371
20372 /* Errors in the source file can cause expand_expr to return const0_rtx
20373 where we expect a vector. To avoid crashing, use one of the vector
20374 clear instructions. */
20375
20376 static rtx
20377 safe_vector_operand (rtx x, enum machine_mode mode)
20378 {
20379 if (x != const0_rtx)
20380 return x;
20381 x = gen_reg_rtx (mode);
20382
20383 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20384 : gen_rtx_SUBREG (DImode, x, 0)));
20385 return x;
20386 }
20387
20388 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20389
20390 static rtx
20391 arm_expand_binop_builtin (enum insn_code icode,
20392 tree exp, rtx target)
20393 {
20394 rtx pat;
20395 tree arg0 = CALL_EXPR_ARG (exp, 0);
20396 tree arg1 = CALL_EXPR_ARG (exp, 1);
20397 rtx op0 = expand_normal (arg0);
20398 rtx op1 = expand_normal (arg1);
20399 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20400 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20401 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20402
20403 if (VECTOR_MODE_P (mode0))
20404 op0 = safe_vector_operand (op0, mode0);
20405 if (VECTOR_MODE_P (mode1))
20406 op1 = safe_vector_operand (op1, mode1);
20407
20408 if (! target
20409 || GET_MODE (target) != tmode
20410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20411 target = gen_reg_rtx (tmode);
20412
20413 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20414 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20415
20416 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20417 op0 = copy_to_mode_reg (mode0, op0);
20418 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20419 op1 = copy_to_mode_reg (mode1, op1);
20420
20421 pat = GEN_FCN (icode) (target, op0, op1);
20422 if (! pat)
20423 return 0;
20424 emit_insn (pat);
20425 return target;
20426 }
20427
20428 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20429
20430 static rtx
20431 arm_expand_unop_builtin (enum insn_code icode,
20432 tree exp, rtx target, int do_load)
20433 {
20434 rtx pat;
20435 tree arg0 = CALL_EXPR_ARG (exp, 0);
20436 rtx op0 = expand_normal (arg0);
20437 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20438 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20439
20440 if (! target
20441 || GET_MODE (target) != tmode
20442 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20443 target = gen_reg_rtx (tmode);
20444 if (do_load)
20445 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20446 else
20447 {
20448 if (VECTOR_MODE_P (mode0))
20449 op0 = safe_vector_operand (op0, mode0);
20450
20451 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20452 op0 = copy_to_mode_reg (mode0, op0);
20453 }
20454
20455 pat = GEN_FCN (icode) (target, op0);
20456 if (! pat)
20457 return 0;
20458 emit_insn (pat);
20459 return target;
20460 }
20461
20462 typedef enum {
20463 NEON_ARG_COPY_TO_REG,
20464 NEON_ARG_CONSTANT,
20465 NEON_ARG_MEMORY,
20466 NEON_ARG_STOP
20467 } builtin_arg;
20468
20469 #define NEON_MAX_BUILTIN_ARGS 5
20470
20471 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20472 and return an expression for the accessed memory.
20473
20474 The intrinsic function operates on a block of registers that has
20475 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20476 function references the memory at EXP of type TYPE and in mode
20477 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20478 available. */
20479
20480 static tree
20481 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
20482 enum machine_mode reg_mode,
20483 neon_builtin_type_mode type_mode)
20484 {
20485 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20486 tree elem_type, upper_bound, array_type;
20487
20488 /* Work out the size of the register block in bytes. */
20489 reg_size = GET_MODE_SIZE (reg_mode);
20490
20491 /* Work out the size of each vector in bytes. */
20492 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20493 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20494
20495 /* Work out how many vectors there are. */
20496 gcc_assert (reg_size % vector_size == 0);
20497 nvectors = reg_size / vector_size;
20498
20499 /* Work out the type of each element. */
20500 gcc_assert (POINTER_TYPE_P (type));
20501 elem_type = TREE_TYPE (type);
20502
20503 /* Work out how many elements are being loaded or stored.
20504 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20505 and memory elements; anything else implies a lane load or store. */
20506 if (mem_mode == reg_mode)
20507 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
20508 else
20509 nelems = nvectors;
20510
20511 /* Create a type that describes the full access. */
20512 upper_bound = build_int_cst (size_type_node, nelems - 1);
20513 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20514
20515 /* Dereference EXP using that type. */
20516 return fold_build2 (MEM_REF, array_type, exp,
20517 build_int_cst (build_pointer_type (array_type), 0));
20518 }
20519
20520 /* Expand a Neon builtin. */
20521 static rtx
20522 arm_expand_neon_args (rtx target, int icode, int have_retval,
20523 neon_builtin_type_mode type_mode,
20524 tree exp, int fcode, ...)
20525 {
20526 va_list ap;
20527 rtx pat;
20528 tree arg[NEON_MAX_BUILTIN_ARGS];
20529 rtx op[NEON_MAX_BUILTIN_ARGS];
20530 tree arg_type;
20531 tree formals;
20532 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20533 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20534 enum machine_mode other_mode;
20535 int argc = 0;
20536 int opno;
20537
20538 if (have_retval
20539 && (!target
20540 || GET_MODE (target) != tmode
20541 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20542 target = gen_reg_rtx (tmode);
20543
20544 va_start (ap, fcode);
20545
20546 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
20547
20548 for (;;)
20549 {
20550 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20551
20552 if (thisarg == NEON_ARG_STOP)
20553 break;
20554 else
20555 {
20556 opno = argc + have_retval;
20557 mode[argc] = insn_data[icode].operand[opno].mode;
20558 arg[argc] = CALL_EXPR_ARG (exp, argc);
20559 arg_type = TREE_VALUE (formals);
20560 if (thisarg == NEON_ARG_MEMORY)
20561 {
20562 other_mode = insn_data[icode].operand[1 - opno].mode;
20563 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
20564 mode[argc], other_mode,
20565 type_mode);
20566 }
20567
20568 op[argc] = expand_normal (arg[argc]);
20569
20570 switch (thisarg)
20571 {
20572 case NEON_ARG_COPY_TO_REG:
20573 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20574 if (!(*insn_data[icode].operand[opno].predicate)
20575 (op[argc], mode[argc]))
20576 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20577 break;
20578
20579 case NEON_ARG_CONSTANT:
20580 /* FIXME: This error message is somewhat unhelpful. */
20581 if (!(*insn_data[icode].operand[opno].predicate)
20582 (op[argc], mode[argc]))
20583 error ("argument must be a constant");
20584 break;
20585
20586 case NEON_ARG_MEMORY:
20587 gcc_assert (MEM_P (op[argc]));
20588 PUT_MODE (op[argc], mode[argc]);
20589 /* ??? arm_neon.h uses the same built-in functions for signed
20590 and unsigned accesses, casting where necessary. This isn't
20591 alias safe. */
20592 set_mem_alias_set (op[argc], 0);
20593 if (!(*insn_data[icode].operand[opno].predicate)
20594 (op[argc], mode[argc]))
20595 op[argc] = (replace_equiv_address
20596 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20597 break;
20598
20599 case NEON_ARG_STOP:
20600 gcc_unreachable ();
20601 }
20602
20603 argc++;
20604 formals = TREE_CHAIN (formals);
20605 }
20606 }
20607
20608 va_end (ap);
20609
20610 if (have_retval)
20611 switch (argc)
20612 {
20613 case 1:
20614 pat = GEN_FCN (icode) (target, op[0]);
20615 break;
20616
20617 case 2:
20618 pat = GEN_FCN (icode) (target, op[0], op[1]);
20619 break;
20620
20621 case 3:
20622 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20623 break;
20624
20625 case 4:
20626 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20627 break;
20628
20629 case 5:
20630 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20631 break;
20632
20633 default:
20634 gcc_unreachable ();
20635 }
20636 else
20637 switch (argc)
20638 {
20639 case 1:
20640 pat = GEN_FCN (icode) (op[0]);
20641 break;
20642
20643 case 2:
20644 pat = GEN_FCN (icode) (op[0], op[1]);
20645 break;
20646
20647 case 3:
20648 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20649 break;
20650
20651 case 4:
20652 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20653 break;
20654
20655 case 5:
20656 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20657 break;
20658
20659 default:
20660 gcc_unreachable ();
20661 }
20662
20663 if (!pat)
20664 return 0;
20665
20666 emit_insn (pat);
20667
20668 return target;
20669 }
20670
20671 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20672 constants defined per-instruction or per instruction-variant. Instead, the
20673 required info is looked up in the table neon_builtin_data. */
20674 static rtx
20675 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20676 {
20677 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20678 neon_itype itype = d->itype;
20679 enum insn_code icode = d->code;
20680 neon_builtin_type_mode type_mode = d->mode;
20681
20682 switch (itype)
20683 {
20684 case NEON_UNOP:
20685 case NEON_CONVERT:
20686 case NEON_DUPLANE:
20687 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20688 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20689
20690 case NEON_BINOP:
20691 case NEON_SETLANE:
20692 case NEON_SCALARMUL:
20693 case NEON_SCALARMULL:
20694 case NEON_SCALARMULH:
20695 case NEON_SHIFTINSERT:
20696 case NEON_LOGICBINOP:
20697 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20698 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20699 NEON_ARG_STOP);
20700
20701 case NEON_TERNOP:
20702 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20703 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20704 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20705
20706 case NEON_GETLANE:
20707 case NEON_FIXCONV:
20708 case NEON_SHIFTIMM:
20709 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20710 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20711 NEON_ARG_STOP);
20712
20713 case NEON_CREATE:
20714 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20715 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20716
20717 case NEON_DUP:
20718 case NEON_SPLIT:
20719 case NEON_REINTERP:
20720 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20721 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20722
20723 case NEON_COMBINE:
20724 case NEON_VTBL:
20725 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20726 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20727
20728 case NEON_RESULTPAIR:
20729 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20730 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20731 NEON_ARG_STOP);
20732
20733 case NEON_LANEMUL:
20734 case NEON_LANEMULL:
20735 case NEON_LANEMULH:
20736 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20737 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20738 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20739
20740 case NEON_LANEMAC:
20741 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20742 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20743 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20744
20745 case NEON_SHIFTACC:
20746 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20747 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20748 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20749
20750 case NEON_SCALARMAC:
20751 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20752 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20753 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20754
20755 case NEON_SELECT:
20756 case NEON_VTBX:
20757 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20758 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20759 NEON_ARG_STOP);
20760
20761 case NEON_LOAD1:
20762 case NEON_LOADSTRUCT:
20763 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20764 NEON_ARG_MEMORY, NEON_ARG_STOP);
20765
20766 case NEON_LOAD1LANE:
20767 case NEON_LOADSTRUCTLANE:
20768 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20769 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20770 NEON_ARG_STOP);
20771
20772 case NEON_STORE1:
20773 case NEON_STORESTRUCT:
20774 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20775 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20776
20777 case NEON_STORE1LANE:
20778 case NEON_STORESTRUCTLANE:
20779 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20780 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20781 NEON_ARG_STOP);
20782 }
20783
20784 gcc_unreachable ();
20785 }
20786
20787 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20788 void
20789 neon_reinterpret (rtx dest, rtx src)
20790 {
20791 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20792 }
20793
20794 /* Emit code to place a Neon pair result in memory locations (with equal
20795 registers). */
20796 void
20797 neon_emit_pair_result_insn (enum machine_mode mode,
20798 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20799 rtx op1, rtx op2)
20800 {
20801 rtx mem = gen_rtx_MEM (mode, destaddr);
20802 rtx tmp1 = gen_reg_rtx (mode);
20803 rtx tmp2 = gen_reg_rtx (mode);
20804
20805 emit_insn (intfn (tmp1, op1, op2, tmp2));
20806
20807 emit_move_insn (mem, tmp1);
20808 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20809 emit_move_insn (mem, tmp2);
20810 }
20811
20812 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20813 not to early-clobber SRC registers in the process.
20814
20815 We assume that the operands described by SRC and DEST represent a
20816 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20817 number of components into which the copy has been decomposed. */
20818 void
20819 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20820 {
20821 unsigned int i;
20822
20823 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20824 || REGNO (operands[0]) < REGNO (operands[1]))
20825 {
20826 for (i = 0; i < count; i++)
20827 {
20828 operands[2 * i] = dest[i];
20829 operands[2 * i + 1] = src[i];
20830 }
20831 }
20832 else
20833 {
20834 for (i = 0; i < count; i++)
20835 {
20836 operands[2 * i] = dest[count - i - 1];
20837 operands[2 * i + 1] = src[count - i - 1];
20838 }
20839 }
20840 }
20841
20842 /* Split operands into moves from op[1] + op[2] into op[0]. */
20843
20844 void
20845 neon_split_vcombine (rtx operands[3])
20846 {
20847 unsigned int dest = REGNO (operands[0]);
20848 unsigned int src1 = REGNO (operands[1]);
20849 unsigned int src2 = REGNO (operands[2]);
20850 enum machine_mode halfmode = GET_MODE (operands[1]);
20851 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20852 rtx destlo, desthi;
20853
20854 if (src1 == dest && src2 == dest + halfregs)
20855 {
20856 /* No-op move. Can't split to nothing; emit something. */
20857 emit_note (NOTE_INSN_DELETED);
20858 return;
20859 }
20860
20861 /* Preserve register attributes for variable tracking. */
20862 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20863 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20864 GET_MODE_SIZE (halfmode));
20865
20866 /* Special case of reversed high/low parts. Use VSWP. */
20867 if (src2 == dest && src1 == dest + halfregs)
20868 {
20869 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20870 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20871 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20872 return;
20873 }
20874
20875 if (!reg_overlap_mentioned_p (operands[2], destlo))
20876 {
20877 /* Try to avoid unnecessary moves if part of the result
20878 is in the right place already. */
20879 if (src1 != dest)
20880 emit_move_insn (destlo, operands[1]);
20881 if (src2 != dest + halfregs)
20882 emit_move_insn (desthi, operands[2]);
20883 }
20884 else
20885 {
20886 if (src2 != dest + halfregs)
20887 emit_move_insn (desthi, operands[2]);
20888 if (src1 != dest)
20889 emit_move_insn (destlo, operands[1]);
20890 }
20891 }
20892
20893 /* Expand an expression EXP that calls a built-in function,
20894 with result going to TARGET if that's convenient
20895 (and in mode MODE if that's convenient).
20896 SUBTARGET may be used as the target for computing one of EXP's operands.
20897 IGNORE is nonzero if the value is to be ignored. */
20898
20899 static rtx
20900 arm_expand_builtin (tree exp,
20901 rtx target,
20902 rtx subtarget ATTRIBUTE_UNUSED,
20903 enum machine_mode mode ATTRIBUTE_UNUSED,
20904 int ignore ATTRIBUTE_UNUSED)
20905 {
20906 const struct builtin_description * d;
20907 enum insn_code icode;
20908 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20909 tree arg0;
20910 tree arg1;
20911 tree arg2;
20912 rtx op0;
20913 rtx op1;
20914 rtx op2;
20915 rtx pat;
20916 int fcode = DECL_FUNCTION_CODE (fndecl);
20917 size_t i;
20918 enum machine_mode tmode;
20919 enum machine_mode mode0;
20920 enum machine_mode mode1;
20921 enum machine_mode mode2;
20922 int opint;
20923 int selector;
20924 int mask;
20925 int imm;
20926
20927 if (fcode >= ARM_BUILTIN_NEON_BASE)
20928 return arm_expand_neon_builtin (fcode, exp, target);
20929
20930 switch (fcode)
20931 {
20932 case ARM_BUILTIN_TEXTRMSB:
20933 case ARM_BUILTIN_TEXTRMUB:
20934 case ARM_BUILTIN_TEXTRMSH:
20935 case ARM_BUILTIN_TEXTRMUH:
20936 case ARM_BUILTIN_TEXTRMSW:
20937 case ARM_BUILTIN_TEXTRMUW:
20938 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20939 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20940 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20941 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20942 : CODE_FOR_iwmmxt_textrmw);
20943
20944 arg0 = CALL_EXPR_ARG (exp, 0);
20945 arg1 = CALL_EXPR_ARG (exp, 1);
20946 op0 = expand_normal (arg0);
20947 op1 = expand_normal (arg1);
20948 tmode = insn_data[icode].operand[0].mode;
20949 mode0 = insn_data[icode].operand[1].mode;
20950 mode1 = insn_data[icode].operand[2].mode;
20951
20952 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20953 op0 = copy_to_mode_reg (mode0, op0);
20954 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20955 {
20956 /* @@@ better error message */
20957 error ("selector must be an immediate");
20958 return gen_reg_rtx (tmode);
20959 }
20960
20961 opint = INTVAL (op1);
20962 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20963 {
20964 if (opint > 7 || opint < 0)
20965 error ("the range of selector should be in 0 to 7");
20966 }
20967 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20968 {
20969 if (opint > 3 || opint < 0)
20970 error ("the range of selector should be in 0 to 3");
20971 }
20972 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20973 {
20974 if (opint > 1 || opint < 0)
20975 error ("the range of selector should be in 0 to 1");
20976 }
20977
20978 if (target == 0
20979 || GET_MODE (target) != tmode
20980 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20981 target = gen_reg_rtx (tmode);
20982 pat = GEN_FCN (icode) (target, op0, op1);
20983 if (! pat)
20984 return 0;
20985 emit_insn (pat);
20986 return target;
20987
20988 case ARM_BUILTIN_WALIGNI:
20989 /* If op2 is immediate, call walighi, else call walighr. */
20990 arg0 = CALL_EXPR_ARG (exp, 0);
20991 arg1 = CALL_EXPR_ARG (exp, 1);
20992 arg2 = CALL_EXPR_ARG (exp, 2);
20993 op0 = expand_normal (arg0);
20994 op1 = expand_normal (arg1);
20995 op2 = expand_normal (arg2);
20996 if (CONST_INT_P (op2))
20997 {
20998 icode = CODE_FOR_iwmmxt_waligni;
20999 tmode = insn_data[icode].operand[0].mode;
21000 mode0 = insn_data[icode].operand[1].mode;
21001 mode1 = insn_data[icode].operand[2].mode;
21002 mode2 = insn_data[icode].operand[3].mode;
21003 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21004 op0 = copy_to_mode_reg (mode0, op0);
21005 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21006 op1 = copy_to_mode_reg (mode1, op1);
21007 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
21008 selector = INTVAL (op2);
21009 if (selector > 7 || selector < 0)
21010 error ("the range of selector should be in 0 to 7");
21011 }
21012 else
21013 {
21014 icode = CODE_FOR_iwmmxt_walignr;
21015 tmode = insn_data[icode].operand[0].mode;
21016 mode0 = insn_data[icode].operand[1].mode;
21017 mode1 = insn_data[icode].operand[2].mode;
21018 mode2 = insn_data[icode].operand[3].mode;
21019 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21020 op0 = copy_to_mode_reg (mode0, op0);
21021 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21022 op1 = copy_to_mode_reg (mode1, op1);
21023 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
21024 op2 = copy_to_mode_reg (mode2, op2);
21025 }
21026 if (target == 0
21027 || GET_MODE (target) != tmode
21028 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21029 target = gen_reg_rtx (tmode);
21030 pat = GEN_FCN (icode) (target, op0, op1, op2);
21031 if (!pat)
21032 return 0;
21033 emit_insn (pat);
21034 return target;
21035
21036 case ARM_BUILTIN_TINSRB:
21037 case ARM_BUILTIN_TINSRH:
21038 case ARM_BUILTIN_TINSRW:
21039 case ARM_BUILTIN_WMERGE:
21040 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21041 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21042 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
21043 : CODE_FOR_iwmmxt_tinsrw);
21044 arg0 = CALL_EXPR_ARG (exp, 0);
21045 arg1 = CALL_EXPR_ARG (exp, 1);
21046 arg2 = CALL_EXPR_ARG (exp, 2);
21047 op0 = expand_normal (arg0);
21048 op1 = expand_normal (arg1);
21049 op2 = expand_normal (arg2);
21050 tmode = insn_data[icode].operand[0].mode;
21051 mode0 = insn_data[icode].operand[1].mode;
21052 mode1 = insn_data[icode].operand[2].mode;
21053 mode2 = insn_data[icode].operand[3].mode;
21054
21055 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21056 op0 = copy_to_mode_reg (mode0, op0);
21057 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21058 op1 = copy_to_mode_reg (mode1, op1);
21059 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21060 {
21061 error ("selector must be an immediate");
21062 return const0_rtx;
21063 }
21064 if (icode == CODE_FOR_iwmmxt_wmerge)
21065 {
21066 selector = INTVAL (op2);
21067 if (selector > 7 || selector < 0)
21068 error ("the range of selector should be in 0 to 7");
21069 }
21070 if ((icode == CODE_FOR_iwmmxt_tinsrb)
21071 || (icode == CODE_FOR_iwmmxt_tinsrh)
21072 || (icode == CODE_FOR_iwmmxt_tinsrw))
21073 {
21074 mask = 0x01;
21075 selector= INTVAL (op2);
21076 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
21077 error ("the range of selector should be in 0 to 7");
21078 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
21079 error ("the range of selector should be in 0 to 3");
21080 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
21081 error ("the range of selector should be in 0 to 1");
21082 mask <<= selector;
21083 op2 = GEN_INT (mask);
21084 }
21085 if (target == 0
21086 || GET_MODE (target) != tmode
21087 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21088 target = gen_reg_rtx (tmode);
21089 pat = GEN_FCN (icode) (target, op0, op1, op2);
21090 if (! pat)
21091 return 0;
21092 emit_insn (pat);
21093 return target;
21094
21095 case ARM_BUILTIN_SETWCGR0:
21096 case ARM_BUILTIN_SETWCGR1:
21097 case ARM_BUILTIN_SETWCGR2:
21098 case ARM_BUILTIN_SETWCGR3:
21099 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21100 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21101 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21102 : CODE_FOR_iwmmxt_setwcgr3);
21103 arg0 = CALL_EXPR_ARG (exp, 0);
21104 op0 = expand_normal (arg0);
21105 mode0 = insn_data[icode].operand[0].mode;
21106 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21107 op0 = copy_to_mode_reg (mode0, op0);
21108 pat = GEN_FCN (icode) (op0);
21109 if (!pat)
21110 return 0;
21111 emit_insn (pat);
21112 return 0;
21113
21114 case ARM_BUILTIN_GETWCGR0:
21115 case ARM_BUILTIN_GETWCGR1:
21116 case ARM_BUILTIN_GETWCGR2:
21117 case ARM_BUILTIN_GETWCGR3:
21118 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21119 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21120 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21121 : CODE_FOR_iwmmxt_getwcgr3);
21122 tmode = insn_data[icode].operand[0].mode;
21123 if (target == 0
21124 || GET_MODE (target) != tmode
21125 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21126 target = gen_reg_rtx (tmode);
21127 pat = GEN_FCN (icode) (target);
21128 if (!pat)
21129 return 0;
21130 emit_insn (pat);
21131 return target;
21132
21133 case ARM_BUILTIN_WSHUFH:
21134 icode = CODE_FOR_iwmmxt_wshufh;
21135 arg0 = CALL_EXPR_ARG (exp, 0);
21136 arg1 = CALL_EXPR_ARG (exp, 1);
21137 op0 = expand_normal (arg0);
21138 op1 = expand_normal (arg1);
21139 tmode = insn_data[icode].operand[0].mode;
21140 mode1 = insn_data[icode].operand[1].mode;
21141 mode2 = insn_data[icode].operand[2].mode;
21142
21143 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21144 op0 = copy_to_mode_reg (mode1, op0);
21145 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21146 {
21147 error ("mask must be an immediate");
21148 return const0_rtx;
21149 }
21150 selector = INTVAL (op1);
21151 if (selector < 0 || selector > 255)
21152 error ("the range of mask should be in 0 to 255");
21153 if (target == 0
21154 || GET_MODE (target) != tmode
21155 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21156 target = gen_reg_rtx (tmode);
21157 pat = GEN_FCN (icode) (target, op0, op1);
21158 if (! pat)
21159 return 0;
21160 emit_insn (pat);
21161 return target;
21162
21163 case ARM_BUILTIN_WMADDS:
21164 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21165 case ARM_BUILTIN_WMADDSX:
21166 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21167 case ARM_BUILTIN_WMADDSN:
21168 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21169 case ARM_BUILTIN_WMADDU:
21170 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21171 case ARM_BUILTIN_WMADDUX:
21172 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21173 case ARM_BUILTIN_WMADDUN:
21174 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21175 case ARM_BUILTIN_WSADBZ:
21176 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21177 case ARM_BUILTIN_WSADHZ:
21178 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21179
21180 /* Several three-argument builtins. */
21181 case ARM_BUILTIN_WMACS:
21182 case ARM_BUILTIN_WMACU:
21183 case ARM_BUILTIN_TMIA:
21184 case ARM_BUILTIN_TMIAPH:
21185 case ARM_BUILTIN_TMIATT:
21186 case ARM_BUILTIN_TMIATB:
21187 case ARM_BUILTIN_TMIABT:
21188 case ARM_BUILTIN_TMIABB:
21189 case ARM_BUILTIN_WQMIABB:
21190 case ARM_BUILTIN_WQMIABT:
21191 case ARM_BUILTIN_WQMIATB:
21192 case ARM_BUILTIN_WQMIATT:
21193 case ARM_BUILTIN_WQMIABBN:
21194 case ARM_BUILTIN_WQMIABTN:
21195 case ARM_BUILTIN_WQMIATBN:
21196 case ARM_BUILTIN_WQMIATTN:
21197 case ARM_BUILTIN_WMIABB:
21198 case ARM_BUILTIN_WMIABT:
21199 case ARM_BUILTIN_WMIATB:
21200 case ARM_BUILTIN_WMIATT:
21201 case ARM_BUILTIN_WMIABBN:
21202 case ARM_BUILTIN_WMIABTN:
21203 case ARM_BUILTIN_WMIATBN:
21204 case ARM_BUILTIN_WMIATTN:
21205 case ARM_BUILTIN_WMIAWBB:
21206 case ARM_BUILTIN_WMIAWBT:
21207 case ARM_BUILTIN_WMIAWTB:
21208 case ARM_BUILTIN_WMIAWTT:
21209 case ARM_BUILTIN_WMIAWBBN:
21210 case ARM_BUILTIN_WMIAWBTN:
21211 case ARM_BUILTIN_WMIAWTBN:
21212 case ARM_BUILTIN_WMIAWTTN:
21213 case ARM_BUILTIN_WSADB:
21214 case ARM_BUILTIN_WSADH:
21215 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21216 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21217 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21218 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21219 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21220 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21221 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21222 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21223 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21224 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21225 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21226 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21227 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21228 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21229 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21230 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21231 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21232 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21233 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21234 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21235 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21236 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21237 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21238 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21239 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21240 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21241 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21242 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21243 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21244 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21245 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21246 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21247 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21248 : CODE_FOR_iwmmxt_wsadh);
21249 arg0 = CALL_EXPR_ARG (exp, 0);
21250 arg1 = CALL_EXPR_ARG (exp, 1);
21251 arg2 = CALL_EXPR_ARG (exp, 2);
21252 op0 = expand_normal (arg0);
21253 op1 = expand_normal (arg1);
21254 op2 = expand_normal (arg2);
21255 tmode = insn_data[icode].operand[0].mode;
21256 mode0 = insn_data[icode].operand[1].mode;
21257 mode1 = insn_data[icode].operand[2].mode;
21258 mode2 = insn_data[icode].operand[3].mode;
21259
21260 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21261 op0 = copy_to_mode_reg (mode0, op0);
21262 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21263 op1 = copy_to_mode_reg (mode1, op1);
21264 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21265 op2 = copy_to_mode_reg (mode2, op2);
21266 if (target == 0
21267 || GET_MODE (target) != tmode
21268 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21269 target = gen_reg_rtx (tmode);
21270 pat = GEN_FCN (icode) (target, op0, op1, op2);
21271 if (! pat)
21272 return 0;
21273 emit_insn (pat);
21274 return target;
21275
21276 case ARM_BUILTIN_WZERO:
21277 target = gen_reg_rtx (DImode);
21278 emit_insn (gen_iwmmxt_clrdi (target));
21279 return target;
21280
21281 case ARM_BUILTIN_WSRLHI:
21282 case ARM_BUILTIN_WSRLWI:
21283 case ARM_BUILTIN_WSRLDI:
21284 case ARM_BUILTIN_WSLLHI:
21285 case ARM_BUILTIN_WSLLWI:
21286 case ARM_BUILTIN_WSLLDI:
21287 case ARM_BUILTIN_WSRAHI:
21288 case ARM_BUILTIN_WSRAWI:
21289 case ARM_BUILTIN_WSRADI:
21290 case ARM_BUILTIN_WRORHI:
21291 case ARM_BUILTIN_WRORWI:
21292 case ARM_BUILTIN_WRORDI:
21293 case ARM_BUILTIN_WSRLH:
21294 case ARM_BUILTIN_WSRLW:
21295 case ARM_BUILTIN_WSRLD:
21296 case ARM_BUILTIN_WSLLH:
21297 case ARM_BUILTIN_WSLLW:
21298 case ARM_BUILTIN_WSLLD:
21299 case ARM_BUILTIN_WSRAH:
21300 case ARM_BUILTIN_WSRAW:
21301 case ARM_BUILTIN_WSRAD:
21302 case ARM_BUILTIN_WRORH:
21303 case ARM_BUILTIN_WRORW:
21304 case ARM_BUILTIN_WRORD:
21305 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21306 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21307 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21308 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21309 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21310 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21311 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21312 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21313 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21314 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21315 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21316 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21317 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21318 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21319 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21320 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21321 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21322 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21323 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21324 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21325 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21326 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21327 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21328 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21329 : CODE_FOR_nothing);
21330 arg1 = CALL_EXPR_ARG (exp, 1);
21331 op1 = expand_normal (arg1);
21332 if (GET_MODE (op1) == VOIDmode)
21333 {
21334 imm = INTVAL (op1);
21335 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21336 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21337 && (imm < 0 || imm > 32))
21338 {
21339 if (fcode == ARM_BUILTIN_WRORHI)
21340 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21341 else if (fcode == ARM_BUILTIN_WRORWI)
21342 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21343 else if (fcode == ARM_BUILTIN_WRORH)
21344 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21345 else
21346 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21347 }
21348 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21349 && (imm < 0 || imm > 64))
21350 {
21351 if (fcode == ARM_BUILTIN_WRORDI)
21352 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21353 else
21354 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21355 }
21356 else if (imm < 0)
21357 {
21358 if (fcode == ARM_BUILTIN_WSRLHI)
21359 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21360 else if (fcode == ARM_BUILTIN_WSRLWI)
21361 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21362 else if (fcode == ARM_BUILTIN_WSRLDI)
21363 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21364 else if (fcode == ARM_BUILTIN_WSLLHI)
21365 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21366 else if (fcode == ARM_BUILTIN_WSLLWI)
21367 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21368 else if (fcode == ARM_BUILTIN_WSLLDI)
21369 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21370 else if (fcode == ARM_BUILTIN_WSRAHI)
21371 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21372 else if (fcode == ARM_BUILTIN_WSRAWI)
21373 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21374 else if (fcode == ARM_BUILTIN_WSRADI)
21375 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21376 else if (fcode == ARM_BUILTIN_WSRLH)
21377 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21378 else if (fcode == ARM_BUILTIN_WSRLW)
21379 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21380 else if (fcode == ARM_BUILTIN_WSRLD)
21381 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21382 else if (fcode == ARM_BUILTIN_WSLLH)
21383 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21384 else if (fcode == ARM_BUILTIN_WSLLW)
21385 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21386 else if (fcode == ARM_BUILTIN_WSLLD)
21387 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21388 else if (fcode == ARM_BUILTIN_WSRAH)
21389 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21390 else if (fcode == ARM_BUILTIN_WSRAW)
21391 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21392 else
21393 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21394 }
21395 }
21396 return arm_expand_binop_builtin (icode, exp, target);
21397
21398 default:
21399 break;
21400 }
21401
21402 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21403 if (d->code == (const enum arm_builtins) fcode)
21404 return arm_expand_binop_builtin (d->icode, exp, target);
21405
21406 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21407 if (d->code == (const enum arm_builtins) fcode)
21408 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21409
21410 /* @@@ Should really do something sensible here. */
21411 return NULL_RTX;
21412 }
21413 \f
21414 /* Return the number (counting from 0) of
21415 the least significant set bit in MASK. */
21416
21417 inline static int
21418 number_of_first_bit_set (unsigned mask)
21419 {
21420 return ctz_hwi (mask);
21421 }
21422
21423 /* Like emit_multi_reg_push, but allowing for a different set of
21424 registers to be described as saved. MASK is the set of registers
21425 to be saved; REAL_REGS is the set of registers to be described as
21426 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21427
21428 static rtx
21429 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21430 {
21431 unsigned long regno;
21432 rtx par[10], tmp, reg, insn;
21433 int i, j;
21434
21435 /* Build the parallel of the registers actually being stored. */
21436 for (i = 0; mask; ++i, mask &= mask - 1)
21437 {
21438 regno = ctz_hwi (mask);
21439 reg = gen_rtx_REG (SImode, regno);
21440
21441 if (i == 0)
21442 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21443 else
21444 tmp = gen_rtx_USE (VOIDmode, reg);
21445
21446 par[i] = tmp;
21447 }
21448
21449 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21450 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21451 tmp = gen_frame_mem (BLKmode, tmp);
21452 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21453 par[0] = tmp;
21454
21455 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21456 insn = emit_insn (tmp);
21457
21458 /* Always build the stack adjustment note for unwind info. */
21459 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21460 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21461 par[0] = tmp;
21462
21463 /* Build the parallel of the registers recorded as saved for unwind. */
21464 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21465 {
21466 regno = ctz_hwi (real_regs);
21467 reg = gen_rtx_REG (SImode, regno);
21468
21469 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21470 tmp = gen_frame_mem (SImode, tmp);
21471 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21472 RTX_FRAME_RELATED_P (tmp) = 1;
21473 par[j + 1] = tmp;
21474 }
21475
21476 if (j == 0)
21477 tmp = par[0];
21478 else
21479 {
21480 RTX_FRAME_RELATED_P (par[0]) = 1;
21481 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21482 }
21483
21484 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21485
21486 return insn;
21487 }
21488
21489 /* Emit code to push or pop registers to or from the stack. F is the
21490 assembly file. MASK is the registers to pop. */
21491 static void
21492 thumb_pop (FILE *f, unsigned long mask)
21493 {
21494 int regno;
21495 int lo_mask = mask & 0xFF;
21496 int pushed_words = 0;
21497
21498 gcc_assert (mask);
21499
21500 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21501 {
21502 /* Special case. Do not generate a POP PC statement here, do it in
21503 thumb_exit() */
21504 thumb_exit (f, -1);
21505 return;
21506 }
21507
21508 fprintf (f, "\tpop\t{");
21509
21510 /* Look at the low registers first. */
21511 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21512 {
21513 if (lo_mask & 1)
21514 {
21515 asm_fprintf (f, "%r", regno);
21516
21517 if ((lo_mask & ~1) != 0)
21518 fprintf (f, ", ");
21519
21520 pushed_words++;
21521 }
21522 }
21523
21524 if (mask & (1 << PC_REGNUM))
21525 {
21526 /* Catch popping the PC. */
21527 if (TARGET_INTERWORK || TARGET_BACKTRACE
21528 || crtl->calls_eh_return)
21529 {
21530 /* The PC is never poped directly, instead
21531 it is popped into r3 and then BX is used. */
21532 fprintf (f, "}\n");
21533
21534 thumb_exit (f, -1);
21535
21536 return;
21537 }
21538 else
21539 {
21540 if (mask & 0xFF)
21541 fprintf (f, ", ");
21542
21543 asm_fprintf (f, "%r", PC_REGNUM);
21544 }
21545 }
21546
21547 fprintf (f, "}\n");
21548 }
21549
21550 /* Generate code to return from a thumb function.
21551 If 'reg_containing_return_addr' is -1, then the return address is
21552 actually on the stack, at the stack pointer. */
21553 static void
21554 thumb_exit (FILE *f, int reg_containing_return_addr)
21555 {
21556 unsigned regs_available_for_popping;
21557 unsigned regs_to_pop;
21558 int pops_needed;
21559 unsigned available;
21560 unsigned required;
21561 int mode;
21562 int size;
21563 int restore_a4 = FALSE;
21564
21565 /* Compute the registers we need to pop. */
21566 regs_to_pop = 0;
21567 pops_needed = 0;
21568
21569 if (reg_containing_return_addr == -1)
21570 {
21571 regs_to_pop |= 1 << LR_REGNUM;
21572 ++pops_needed;
21573 }
21574
21575 if (TARGET_BACKTRACE)
21576 {
21577 /* Restore the (ARM) frame pointer and stack pointer. */
21578 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21579 pops_needed += 2;
21580 }
21581
21582 /* If there is nothing to pop then just emit the BX instruction and
21583 return. */
21584 if (pops_needed == 0)
21585 {
21586 if (crtl->calls_eh_return)
21587 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21588
21589 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21590 return;
21591 }
21592 /* Otherwise if we are not supporting interworking and we have not created
21593 a backtrace structure and the function was not entered in ARM mode then
21594 just pop the return address straight into the PC. */
21595 else if (!TARGET_INTERWORK
21596 && !TARGET_BACKTRACE
21597 && !is_called_in_ARM_mode (current_function_decl)
21598 && !crtl->calls_eh_return)
21599 {
21600 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21601 return;
21602 }
21603
21604 /* Find out how many of the (return) argument registers we can corrupt. */
21605 regs_available_for_popping = 0;
21606
21607 /* If returning via __builtin_eh_return, the bottom three registers
21608 all contain information needed for the return. */
21609 if (crtl->calls_eh_return)
21610 size = 12;
21611 else
21612 {
21613 /* If we can deduce the registers used from the function's
21614 return value. This is more reliable that examining
21615 df_regs_ever_live_p () because that will be set if the register is
21616 ever used in the function, not just if the register is used
21617 to hold a return value. */
21618
21619 if (crtl->return_rtx != 0)
21620 mode = GET_MODE (crtl->return_rtx);
21621 else
21622 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21623
21624 size = GET_MODE_SIZE (mode);
21625
21626 if (size == 0)
21627 {
21628 /* In a void function we can use any argument register.
21629 In a function that returns a structure on the stack
21630 we can use the second and third argument registers. */
21631 if (mode == VOIDmode)
21632 regs_available_for_popping =
21633 (1 << ARG_REGISTER (1))
21634 | (1 << ARG_REGISTER (2))
21635 | (1 << ARG_REGISTER (3));
21636 else
21637 regs_available_for_popping =
21638 (1 << ARG_REGISTER (2))
21639 | (1 << ARG_REGISTER (3));
21640 }
21641 else if (size <= 4)
21642 regs_available_for_popping =
21643 (1 << ARG_REGISTER (2))
21644 | (1 << ARG_REGISTER (3));
21645 else if (size <= 8)
21646 regs_available_for_popping =
21647 (1 << ARG_REGISTER (3));
21648 }
21649
21650 /* Match registers to be popped with registers into which we pop them. */
21651 for (available = regs_available_for_popping,
21652 required = regs_to_pop;
21653 required != 0 && available != 0;
21654 available &= ~(available & - available),
21655 required &= ~(required & - required))
21656 -- pops_needed;
21657
21658 /* If we have any popping registers left over, remove them. */
21659 if (available > 0)
21660 regs_available_for_popping &= ~available;
21661
21662 /* Otherwise if we need another popping register we can use
21663 the fourth argument register. */
21664 else if (pops_needed)
21665 {
21666 /* If we have not found any free argument registers and
21667 reg a4 contains the return address, we must move it. */
21668 if (regs_available_for_popping == 0
21669 && reg_containing_return_addr == LAST_ARG_REGNUM)
21670 {
21671 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21672 reg_containing_return_addr = LR_REGNUM;
21673 }
21674 else if (size > 12)
21675 {
21676 /* Register a4 is being used to hold part of the return value,
21677 but we have dire need of a free, low register. */
21678 restore_a4 = TRUE;
21679
21680 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21681 }
21682
21683 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21684 {
21685 /* The fourth argument register is available. */
21686 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21687
21688 --pops_needed;
21689 }
21690 }
21691
21692 /* Pop as many registers as we can. */
21693 thumb_pop (f, regs_available_for_popping);
21694
21695 /* Process the registers we popped. */
21696 if (reg_containing_return_addr == -1)
21697 {
21698 /* The return address was popped into the lowest numbered register. */
21699 regs_to_pop &= ~(1 << LR_REGNUM);
21700
21701 reg_containing_return_addr =
21702 number_of_first_bit_set (regs_available_for_popping);
21703
21704 /* Remove this register for the mask of available registers, so that
21705 the return address will not be corrupted by further pops. */
21706 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21707 }
21708
21709 /* If we popped other registers then handle them here. */
21710 if (regs_available_for_popping)
21711 {
21712 int frame_pointer;
21713
21714 /* Work out which register currently contains the frame pointer. */
21715 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21716
21717 /* Move it into the correct place. */
21718 asm_fprintf (f, "\tmov\t%r, %r\n",
21719 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21720
21721 /* (Temporarily) remove it from the mask of popped registers. */
21722 regs_available_for_popping &= ~(1 << frame_pointer);
21723 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21724
21725 if (regs_available_for_popping)
21726 {
21727 int stack_pointer;
21728
21729 /* We popped the stack pointer as well,
21730 find the register that contains it. */
21731 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21732
21733 /* Move it into the stack register. */
21734 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21735
21736 /* At this point we have popped all necessary registers, so
21737 do not worry about restoring regs_available_for_popping
21738 to its correct value:
21739
21740 assert (pops_needed == 0)
21741 assert (regs_available_for_popping == (1 << frame_pointer))
21742 assert (regs_to_pop == (1 << STACK_POINTER)) */
21743 }
21744 else
21745 {
21746 /* Since we have just move the popped value into the frame
21747 pointer, the popping register is available for reuse, and
21748 we know that we still have the stack pointer left to pop. */
21749 regs_available_for_popping |= (1 << frame_pointer);
21750 }
21751 }
21752
21753 /* If we still have registers left on the stack, but we no longer have
21754 any registers into which we can pop them, then we must move the return
21755 address into the link register and make available the register that
21756 contained it. */
21757 if (regs_available_for_popping == 0 && pops_needed > 0)
21758 {
21759 regs_available_for_popping |= 1 << reg_containing_return_addr;
21760
21761 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21762 reg_containing_return_addr);
21763
21764 reg_containing_return_addr = LR_REGNUM;
21765 }
21766
21767 /* If we have registers left on the stack then pop some more.
21768 We know that at most we will want to pop FP and SP. */
21769 if (pops_needed > 0)
21770 {
21771 int popped_into;
21772 int move_to;
21773
21774 thumb_pop (f, regs_available_for_popping);
21775
21776 /* We have popped either FP or SP.
21777 Move whichever one it is into the correct register. */
21778 popped_into = number_of_first_bit_set (regs_available_for_popping);
21779 move_to = number_of_first_bit_set (regs_to_pop);
21780
21781 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21782
21783 regs_to_pop &= ~(1 << move_to);
21784
21785 --pops_needed;
21786 }
21787
21788 /* If we still have not popped everything then we must have only
21789 had one register available to us and we are now popping the SP. */
21790 if (pops_needed > 0)
21791 {
21792 int popped_into;
21793
21794 thumb_pop (f, regs_available_for_popping);
21795
21796 popped_into = number_of_first_bit_set (regs_available_for_popping);
21797
21798 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21799 /*
21800 assert (regs_to_pop == (1 << STACK_POINTER))
21801 assert (pops_needed == 1)
21802 */
21803 }
21804
21805 /* If necessary restore the a4 register. */
21806 if (restore_a4)
21807 {
21808 if (reg_containing_return_addr != LR_REGNUM)
21809 {
21810 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21811 reg_containing_return_addr = LR_REGNUM;
21812 }
21813
21814 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21815 }
21816
21817 if (crtl->calls_eh_return)
21818 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21819
21820 /* Return to caller. */
21821 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21822 }
21823 \f
21824 /* Scan INSN just before assembler is output for it.
21825 For Thumb-1, we track the status of the condition codes; this
21826 information is used in the cbranchsi4_insn pattern. */
21827 void
21828 thumb1_final_prescan_insn (rtx insn)
21829 {
21830 if (flag_print_asm_name)
21831 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21832 INSN_ADDRESSES (INSN_UID (insn)));
21833 /* Don't overwrite the previous setter when we get to a cbranch. */
21834 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21835 {
21836 enum attr_conds conds;
21837
21838 if (cfun->machine->thumb1_cc_insn)
21839 {
21840 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21841 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21842 CC_STATUS_INIT;
21843 }
21844 conds = get_attr_conds (insn);
21845 if (conds == CONDS_SET)
21846 {
21847 rtx set = single_set (insn);
21848 cfun->machine->thumb1_cc_insn = insn;
21849 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21850 cfun->machine->thumb1_cc_op1 = const0_rtx;
21851 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21852 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21853 {
21854 rtx src1 = XEXP (SET_SRC (set), 1);
21855 if (src1 == const0_rtx)
21856 cfun->machine->thumb1_cc_mode = CCmode;
21857 }
21858 }
21859 else if (conds != CONDS_NOCOND)
21860 cfun->machine->thumb1_cc_insn = NULL_RTX;
21861 }
21862 }
21863
21864 int
21865 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21866 {
21867 unsigned HOST_WIDE_INT mask = 0xff;
21868 int i;
21869
21870 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21871 if (val == 0) /* XXX */
21872 return 0;
21873
21874 for (i = 0; i < 25; i++)
21875 if ((val & (mask << i)) == val)
21876 return 1;
21877
21878 return 0;
21879 }
21880
21881 /* Returns nonzero if the current function contains,
21882 or might contain a far jump. */
21883 static int
21884 thumb_far_jump_used_p (void)
21885 {
21886 rtx insn;
21887
21888 /* This test is only important for leaf functions. */
21889 /* assert (!leaf_function_p ()); */
21890
21891 /* If we have already decided that far jumps may be used,
21892 do not bother checking again, and always return true even if
21893 it turns out that they are not being used. Once we have made
21894 the decision that far jumps are present (and that hence the link
21895 register will be pushed onto the stack) we cannot go back on it. */
21896 if (cfun->machine->far_jump_used)
21897 return 1;
21898
21899 /* If this function is not being called from the prologue/epilogue
21900 generation code then it must be being called from the
21901 INITIAL_ELIMINATION_OFFSET macro. */
21902 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21903 {
21904 /* In this case we know that we are being asked about the elimination
21905 of the arg pointer register. If that register is not being used,
21906 then there are no arguments on the stack, and we do not have to
21907 worry that a far jump might force the prologue to push the link
21908 register, changing the stack offsets. In this case we can just
21909 return false, since the presence of far jumps in the function will
21910 not affect stack offsets.
21911
21912 If the arg pointer is live (or if it was live, but has now been
21913 eliminated and so set to dead) then we do have to test to see if
21914 the function might contain a far jump. This test can lead to some
21915 false negatives, since before reload is completed, then length of
21916 branch instructions is not known, so gcc defaults to returning their
21917 longest length, which in turn sets the far jump attribute to true.
21918
21919 A false negative will not result in bad code being generated, but it
21920 will result in a needless push and pop of the link register. We
21921 hope that this does not occur too often.
21922
21923 If we need doubleword stack alignment this could affect the other
21924 elimination offsets so we can't risk getting it wrong. */
21925 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21926 cfun->machine->arg_pointer_live = 1;
21927 else if (!cfun->machine->arg_pointer_live)
21928 return 0;
21929 }
21930
21931 /* Check to see if the function contains a branch
21932 insn with the far jump attribute set. */
21933 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21934 {
21935 if (JUMP_P (insn)
21936 /* Ignore tablejump patterns. */
21937 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21938 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21939 && get_attr_far_jump (insn) == FAR_JUMP_YES
21940 )
21941 {
21942 /* Record the fact that we have decided that
21943 the function does use far jumps. */
21944 cfun->machine->far_jump_used = 1;
21945 return 1;
21946 }
21947 }
21948
21949 return 0;
21950 }
21951
21952 /* Return nonzero if FUNC must be entered in ARM mode. */
21953 int
21954 is_called_in_ARM_mode (tree func)
21955 {
21956 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21957
21958 /* Ignore the problem about functions whose address is taken. */
21959 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21960 return TRUE;
21961
21962 #ifdef ARM_PE
21963 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21964 #else
21965 return FALSE;
21966 #endif
21967 }
21968
21969 /* Given the stack offsets and register mask in OFFSETS, decide how
21970 many additional registers to push instead of subtracting a constant
21971 from SP. For epilogues the principle is the same except we use pop.
21972 FOR_PROLOGUE indicates which we're generating. */
21973 static int
21974 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21975 {
21976 HOST_WIDE_INT amount;
21977 unsigned long live_regs_mask = offsets->saved_regs_mask;
21978 /* Extract a mask of the ones we can give to the Thumb's push/pop
21979 instruction. */
21980 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21981 /* Then count how many other high registers will need to be pushed. */
21982 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21983 int n_free, reg_base, size;
21984
21985 if (!for_prologue && frame_pointer_needed)
21986 amount = offsets->locals_base - offsets->saved_regs;
21987 else
21988 amount = offsets->outgoing_args - offsets->saved_regs;
21989
21990 /* If the stack frame size is 512 exactly, we can save one load
21991 instruction, which should make this a win even when optimizing
21992 for speed. */
21993 if (!optimize_size && amount != 512)
21994 return 0;
21995
21996 /* Can't do this if there are high registers to push. */
21997 if (high_regs_pushed != 0)
21998 return 0;
21999
22000 /* Shouldn't do it in the prologue if no registers would normally
22001 be pushed at all. In the epilogue, also allow it if we'll have
22002 a pop insn for the PC. */
22003 if (l_mask == 0
22004 && (for_prologue
22005 || TARGET_BACKTRACE
22006 || (live_regs_mask & 1 << LR_REGNUM) == 0
22007 || TARGET_INTERWORK
22008 || crtl->args.pretend_args_size != 0))
22009 return 0;
22010
22011 /* Don't do this if thumb_expand_prologue wants to emit instructions
22012 between the push and the stack frame allocation. */
22013 if (for_prologue
22014 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
22015 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
22016 return 0;
22017
22018 reg_base = 0;
22019 n_free = 0;
22020 if (!for_prologue)
22021 {
22022 size = arm_size_return_regs ();
22023 reg_base = ARM_NUM_INTS (size);
22024 live_regs_mask >>= reg_base;
22025 }
22026
22027 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
22028 && (for_prologue || call_used_regs[reg_base + n_free]))
22029 {
22030 live_regs_mask >>= 1;
22031 n_free++;
22032 }
22033
22034 if (n_free == 0)
22035 return 0;
22036 gcc_assert (amount / 4 * 4 == amount);
22037
22038 if (amount >= 512 && (amount - n_free * 4) < 512)
22039 return (amount - 508) / 4;
22040 if (amount <= n_free * 4)
22041 return amount / 4;
22042 return 0;
22043 }
22044
22045 /* The bits which aren't usefully expanded as rtl. */
22046 const char *
22047 thumb1_unexpanded_epilogue (void)
22048 {
22049 arm_stack_offsets *offsets;
22050 int regno;
22051 unsigned long live_regs_mask = 0;
22052 int high_regs_pushed = 0;
22053 int extra_pop;
22054 int had_to_push_lr;
22055 int size;
22056
22057 if (cfun->machine->return_used_this_function != 0)
22058 return "";
22059
22060 if (IS_NAKED (arm_current_func_type ()))
22061 return "";
22062
22063 offsets = arm_get_frame_offsets ();
22064 live_regs_mask = offsets->saved_regs_mask;
22065 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22066
22067 /* If we can deduce the registers used from the function's return value.
22068 This is more reliable that examining df_regs_ever_live_p () because that
22069 will be set if the register is ever used in the function, not just if
22070 the register is used to hold a return value. */
22071 size = arm_size_return_regs ();
22072
22073 extra_pop = thumb1_extra_regs_pushed (offsets, false);
22074 if (extra_pop > 0)
22075 {
22076 unsigned long extra_mask = (1 << extra_pop) - 1;
22077 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
22078 }
22079
22080 /* The prolog may have pushed some high registers to use as
22081 work registers. e.g. the testsuite file:
22082 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22083 compiles to produce:
22084 push {r4, r5, r6, r7, lr}
22085 mov r7, r9
22086 mov r6, r8
22087 push {r6, r7}
22088 as part of the prolog. We have to undo that pushing here. */
22089
22090 if (high_regs_pushed)
22091 {
22092 unsigned long mask = live_regs_mask & 0xff;
22093 int next_hi_reg;
22094
22095 /* The available low registers depend on the size of the value we are
22096 returning. */
22097 if (size <= 12)
22098 mask |= 1 << 3;
22099 if (size <= 8)
22100 mask |= 1 << 2;
22101
22102 if (mask == 0)
22103 /* Oh dear! We have no low registers into which we can pop
22104 high registers! */
22105 internal_error
22106 ("no low registers available for popping high registers");
22107
22108 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22109 if (live_regs_mask & (1 << next_hi_reg))
22110 break;
22111
22112 while (high_regs_pushed)
22113 {
22114 /* Find lo register(s) into which the high register(s) can
22115 be popped. */
22116 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22117 {
22118 if (mask & (1 << regno))
22119 high_regs_pushed--;
22120 if (high_regs_pushed == 0)
22121 break;
22122 }
22123
22124 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22125
22126 /* Pop the values into the low register(s). */
22127 thumb_pop (asm_out_file, mask);
22128
22129 /* Move the value(s) into the high registers. */
22130 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22131 {
22132 if (mask & (1 << regno))
22133 {
22134 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22135 regno);
22136
22137 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22138 if (live_regs_mask & (1 << next_hi_reg))
22139 break;
22140 }
22141 }
22142 }
22143 live_regs_mask &= ~0x0f00;
22144 }
22145
22146 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22147 live_regs_mask &= 0xff;
22148
22149 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22150 {
22151 /* Pop the return address into the PC. */
22152 if (had_to_push_lr)
22153 live_regs_mask |= 1 << PC_REGNUM;
22154
22155 /* Either no argument registers were pushed or a backtrace
22156 structure was created which includes an adjusted stack
22157 pointer, so just pop everything. */
22158 if (live_regs_mask)
22159 thumb_pop (asm_out_file, live_regs_mask);
22160
22161 /* We have either just popped the return address into the
22162 PC or it is was kept in LR for the entire function.
22163 Note that thumb_pop has already called thumb_exit if the
22164 PC was in the list. */
22165 if (!had_to_push_lr)
22166 thumb_exit (asm_out_file, LR_REGNUM);
22167 }
22168 else
22169 {
22170 /* Pop everything but the return address. */
22171 if (live_regs_mask)
22172 thumb_pop (asm_out_file, live_regs_mask);
22173
22174 if (had_to_push_lr)
22175 {
22176 if (size > 12)
22177 {
22178 /* We have no free low regs, so save one. */
22179 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22180 LAST_ARG_REGNUM);
22181 }
22182
22183 /* Get the return address into a temporary register. */
22184 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22185
22186 if (size > 12)
22187 {
22188 /* Move the return address to lr. */
22189 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22190 LAST_ARG_REGNUM);
22191 /* Restore the low register. */
22192 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22193 IP_REGNUM);
22194 regno = LR_REGNUM;
22195 }
22196 else
22197 regno = LAST_ARG_REGNUM;
22198 }
22199 else
22200 regno = LR_REGNUM;
22201
22202 /* Remove the argument registers that were pushed onto the stack. */
22203 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22204 SP_REGNUM, SP_REGNUM,
22205 crtl->args.pretend_args_size);
22206
22207 thumb_exit (asm_out_file, regno);
22208 }
22209
22210 return "";
22211 }
22212
22213 /* Functions to save and restore machine-specific function data. */
22214 static struct machine_function *
22215 arm_init_machine_status (void)
22216 {
22217 struct machine_function *machine;
22218 machine = ggc_alloc_cleared_machine_function ();
22219
22220 #if ARM_FT_UNKNOWN != 0
22221 machine->func_type = ARM_FT_UNKNOWN;
22222 #endif
22223 return machine;
22224 }
22225
22226 /* Return an RTX indicating where the return address to the
22227 calling function can be found. */
22228 rtx
22229 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22230 {
22231 if (count != 0)
22232 return NULL_RTX;
22233
22234 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22235 }
22236
22237 /* Do anything needed before RTL is emitted for each function. */
22238 void
22239 arm_init_expanders (void)
22240 {
22241 /* Arrange to initialize and mark the machine per-function status. */
22242 init_machine_status = arm_init_machine_status;
22243
22244 /* This is to stop the combine pass optimizing away the alignment
22245 adjustment of va_arg. */
22246 /* ??? It is claimed that this should not be necessary. */
22247 if (cfun)
22248 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22249 }
22250
22251
22252 /* Like arm_compute_initial_elimination offset. Simpler because there
22253 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22254 to point at the base of the local variables after static stack
22255 space for a function has been allocated. */
22256
22257 HOST_WIDE_INT
22258 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22259 {
22260 arm_stack_offsets *offsets;
22261
22262 offsets = arm_get_frame_offsets ();
22263
22264 switch (from)
22265 {
22266 case ARG_POINTER_REGNUM:
22267 switch (to)
22268 {
22269 case STACK_POINTER_REGNUM:
22270 return offsets->outgoing_args - offsets->saved_args;
22271
22272 case FRAME_POINTER_REGNUM:
22273 return offsets->soft_frame - offsets->saved_args;
22274
22275 case ARM_HARD_FRAME_POINTER_REGNUM:
22276 return offsets->saved_regs - offsets->saved_args;
22277
22278 case THUMB_HARD_FRAME_POINTER_REGNUM:
22279 return offsets->locals_base - offsets->saved_args;
22280
22281 default:
22282 gcc_unreachable ();
22283 }
22284 break;
22285
22286 case FRAME_POINTER_REGNUM:
22287 switch (to)
22288 {
22289 case STACK_POINTER_REGNUM:
22290 return offsets->outgoing_args - offsets->soft_frame;
22291
22292 case ARM_HARD_FRAME_POINTER_REGNUM:
22293 return offsets->saved_regs - offsets->soft_frame;
22294
22295 case THUMB_HARD_FRAME_POINTER_REGNUM:
22296 return offsets->locals_base - offsets->soft_frame;
22297
22298 default:
22299 gcc_unreachable ();
22300 }
22301 break;
22302
22303 default:
22304 gcc_unreachable ();
22305 }
22306 }
22307
22308 /* Generate the function's prologue. */
22309
22310 void
22311 thumb1_expand_prologue (void)
22312 {
22313 rtx insn;
22314
22315 HOST_WIDE_INT amount;
22316 arm_stack_offsets *offsets;
22317 unsigned long func_type;
22318 int regno;
22319 unsigned long live_regs_mask;
22320 unsigned long l_mask;
22321 unsigned high_regs_pushed = 0;
22322
22323 func_type = arm_current_func_type ();
22324
22325 /* Naked functions don't have prologues. */
22326 if (IS_NAKED (func_type))
22327 return;
22328
22329 if (IS_INTERRUPT (func_type))
22330 {
22331 error ("interrupt Service Routines cannot be coded in Thumb mode");
22332 return;
22333 }
22334
22335 if (is_called_in_ARM_mode (current_function_decl))
22336 emit_insn (gen_prologue_thumb1_interwork ());
22337
22338 offsets = arm_get_frame_offsets ();
22339 live_regs_mask = offsets->saved_regs_mask;
22340
22341 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22342 l_mask = live_regs_mask & 0x40ff;
22343 /* Then count how many other high registers will need to be pushed. */
22344 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22345
22346 if (crtl->args.pretend_args_size)
22347 {
22348 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22349
22350 if (cfun->machine->uses_anonymous_args)
22351 {
22352 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22353 unsigned long mask;
22354
22355 mask = 1ul << (LAST_ARG_REGNUM + 1);
22356 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22357
22358 insn = thumb1_emit_multi_reg_push (mask, 0);
22359 }
22360 else
22361 {
22362 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22363 stack_pointer_rtx, x));
22364 }
22365 RTX_FRAME_RELATED_P (insn) = 1;
22366 }
22367
22368 if (TARGET_BACKTRACE)
22369 {
22370 HOST_WIDE_INT offset = 0;
22371 unsigned work_register;
22372 rtx work_reg, x, arm_hfp_rtx;
22373
22374 /* We have been asked to create a stack backtrace structure.
22375 The code looks like this:
22376
22377 0 .align 2
22378 0 func:
22379 0 sub SP, #16 Reserve space for 4 registers.
22380 2 push {R7} Push low registers.
22381 4 add R7, SP, #20 Get the stack pointer before the push.
22382 6 str R7, [SP, #8] Store the stack pointer
22383 (before reserving the space).
22384 8 mov R7, PC Get hold of the start of this code + 12.
22385 10 str R7, [SP, #16] Store it.
22386 12 mov R7, FP Get hold of the current frame pointer.
22387 14 str R7, [SP, #4] Store it.
22388 16 mov R7, LR Get hold of the current return address.
22389 18 str R7, [SP, #12] Store it.
22390 20 add R7, SP, #16 Point at the start of the
22391 backtrace structure.
22392 22 mov FP, R7 Put this value into the frame pointer. */
22393
22394 work_register = thumb_find_work_register (live_regs_mask);
22395 work_reg = gen_rtx_REG (SImode, work_register);
22396 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22397
22398 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22399 stack_pointer_rtx, GEN_INT (-16)));
22400 RTX_FRAME_RELATED_P (insn) = 1;
22401
22402 if (l_mask)
22403 {
22404 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22405 RTX_FRAME_RELATED_P (insn) = 1;
22406
22407 offset = bit_count (l_mask) * UNITS_PER_WORD;
22408 }
22409
22410 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22411 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22412
22413 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22414 x = gen_frame_mem (SImode, x);
22415 emit_move_insn (x, work_reg);
22416
22417 /* Make sure that the instruction fetching the PC is in the right place
22418 to calculate "start of backtrace creation code + 12". */
22419 /* ??? The stores using the common WORK_REG ought to be enough to
22420 prevent the scheduler from doing anything weird. Failing that
22421 we could always move all of the following into an UNSPEC_VOLATILE. */
22422 if (l_mask)
22423 {
22424 x = gen_rtx_REG (SImode, PC_REGNUM);
22425 emit_move_insn (work_reg, x);
22426
22427 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22428 x = gen_frame_mem (SImode, x);
22429 emit_move_insn (x, work_reg);
22430
22431 emit_move_insn (work_reg, arm_hfp_rtx);
22432
22433 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22434 x = gen_frame_mem (SImode, x);
22435 emit_move_insn (x, work_reg);
22436 }
22437 else
22438 {
22439 emit_move_insn (work_reg, arm_hfp_rtx);
22440
22441 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22442 x = gen_frame_mem (SImode, x);
22443 emit_move_insn (x, work_reg);
22444
22445 x = gen_rtx_REG (SImode, PC_REGNUM);
22446 emit_move_insn (work_reg, x);
22447
22448 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22449 x = gen_frame_mem (SImode, x);
22450 emit_move_insn (x, work_reg);
22451 }
22452
22453 x = gen_rtx_REG (SImode, LR_REGNUM);
22454 emit_move_insn (work_reg, x);
22455
22456 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22457 x = gen_frame_mem (SImode, x);
22458 emit_move_insn (x, work_reg);
22459
22460 x = GEN_INT (offset + 12);
22461 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22462
22463 emit_move_insn (arm_hfp_rtx, work_reg);
22464 }
22465 /* Optimization: If we are not pushing any low registers but we are going
22466 to push some high registers then delay our first push. This will just
22467 be a push of LR and we can combine it with the push of the first high
22468 register. */
22469 else if ((l_mask & 0xff) != 0
22470 || (high_regs_pushed == 0 && l_mask))
22471 {
22472 unsigned long mask = l_mask;
22473 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22474 insn = thumb1_emit_multi_reg_push (mask, mask);
22475 RTX_FRAME_RELATED_P (insn) = 1;
22476 }
22477
22478 if (high_regs_pushed)
22479 {
22480 unsigned pushable_regs;
22481 unsigned next_hi_reg;
22482
22483 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22484 if (live_regs_mask & (1 << next_hi_reg))
22485 break;
22486
22487 pushable_regs = l_mask & 0xff;
22488
22489 if (pushable_regs == 0)
22490 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22491
22492 while (high_regs_pushed > 0)
22493 {
22494 unsigned long real_regs_mask = 0;
22495
22496 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22497 {
22498 if (pushable_regs & (1 << regno))
22499 {
22500 emit_move_insn (gen_rtx_REG (SImode, regno),
22501 gen_rtx_REG (SImode, next_hi_reg));
22502
22503 high_regs_pushed --;
22504 real_regs_mask |= (1 << next_hi_reg);
22505
22506 if (high_regs_pushed)
22507 {
22508 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22509 next_hi_reg --)
22510 if (live_regs_mask & (1 << next_hi_reg))
22511 break;
22512 }
22513 else
22514 {
22515 pushable_regs &= ~((1 << regno) - 1);
22516 break;
22517 }
22518 }
22519 }
22520
22521 /* If we had to find a work register and we have not yet
22522 saved the LR then add it to the list of regs to push. */
22523 if (l_mask == (1 << LR_REGNUM))
22524 {
22525 pushable_regs |= l_mask;
22526 real_regs_mask |= l_mask;
22527 l_mask = 0;
22528 }
22529
22530 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22531 RTX_FRAME_RELATED_P (insn) = 1;
22532 }
22533 }
22534
22535 /* Load the pic register before setting the frame pointer,
22536 so we can use r7 as a temporary work register. */
22537 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22538 arm_load_pic_register (live_regs_mask);
22539
22540 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22541 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22542 stack_pointer_rtx);
22543
22544 if (flag_stack_usage_info)
22545 current_function_static_stack_size
22546 = offsets->outgoing_args - offsets->saved_args;
22547
22548 amount = offsets->outgoing_args - offsets->saved_regs;
22549 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22550 if (amount)
22551 {
22552 if (amount < 512)
22553 {
22554 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22555 GEN_INT (- amount)));
22556 RTX_FRAME_RELATED_P (insn) = 1;
22557 }
22558 else
22559 {
22560 rtx reg, dwarf;
22561
22562 /* The stack decrement is too big for an immediate value in a single
22563 insn. In theory we could issue multiple subtracts, but after
22564 three of them it becomes more space efficient to place the full
22565 value in the constant pool and load into a register. (Also the
22566 ARM debugger really likes to see only one stack decrement per
22567 function). So instead we look for a scratch register into which
22568 we can load the decrement, and then we subtract this from the
22569 stack pointer. Unfortunately on the thumb the only available
22570 scratch registers are the argument registers, and we cannot use
22571 these as they may hold arguments to the function. Instead we
22572 attempt to locate a call preserved register which is used by this
22573 function. If we can find one, then we know that it will have
22574 been pushed at the start of the prologue and so we can corrupt
22575 it now. */
22576 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22577 if (live_regs_mask & (1 << regno))
22578 break;
22579
22580 gcc_assert(regno <= LAST_LO_REGNUM);
22581
22582 reg = gen_rtx_REG (SImode, regno);
22583
22584 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22585
22586 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22587 stack_pointer_rtx, reg));
22588
22589 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22590 plus_constant (Pmode, stack_pointer_rtx,
22591 -amount));
22592 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22593 RTX_FRAME_RELATED_P (insn) = 1;
22594 }
22595 }
22596
22597 if (frame_pointer_needed)
22598 thumb_set_frame_pointer (offsets);
22599
22600 /* If we are profiling, make sure no instructions are scheduled before
22601 the call to mcount. Similarly if the user has requested no
22602 scheduling in the prolog. Similarly if we want non-call exceptions
22603 using the EABI unwinder, to prevent faulting instructions from being
22604 swapped with a stack adjustment. */
22605 if (crtl->profile || !TARGET_SCHED_PROLOG
22606 || (arm_except_unwind_info (&global_options) == UI_TARGET
22607 && cfun->can_throw_non_call_exceptions))
22608 emit_insn (gen_blockage ());
22609
22610 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22611 if (live_regs_mask & 0xff)
22612 cfun->machine->lr_save_eliminated = 0;
22613 }
22614
22615 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22616 POP instruction can be generated. LR should be replaced by PC. All
22617 the checks required are already done by USE_RETURN_INSN (). Hence,
22618 all we really need to check here is if single register is to be
22619 returned, or multiple register return. */
22620 void
22621 thumb2_expand_return (void)
22622 {
22623 int i, num_regs;
22624 unsigned long saved_regs_mask;
22625 arm_stack_offsets *offsets;
22626
22627 offsets = arm_get_frame_offsets ();
22628 saved_regs_mask = offsets->saved_regs_mask;
22629
22630 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22631 if (saved_regs_mask & (1 << i))
22632 num_regs++;
22633
22634 if (saved_regs_mask)
22635 {
22636 if (num_regs == 1)
22637 {
22638 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22639 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22640 rtx addr = gen_rtx_MEM (SImode,
22641 gen_rtx_POST_INC (SImode,
22642 stack_pointer_rtx));
22643 set_mem_alias_set (addr, get_frame_alias_set ());
22644 XVECEXP (par, 0, 0) = ret_rtx;
22645 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22646 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22647 emit_jump_insn (par);
22648 }
22649 else
22650 {
22651 saved_regs_mask &= ~ (1 << LR_REGNUM);
22652 saved_regs_mask |= (1 << PC_REGNUM);
22653 arm_emit_multi_reg_pop (saved_regs_mask);
22654 }
22655 }
22656 else
22657 {
22658 emit_jump_insn (simple_return_rtx);
22659 }
22660 }
22661
22662 void
22663 thumb1_expand_epilogue (void)
22664 {
22665 HOST_WIDE_INT amount;
22666 arm_stack_offsets *offsets;
22667 int regno;
22668
22669 /* Naked functions don't have prologues. */
22670 if (IS_NAKED (arm_current_func_type ()))
22671 return;
22672
22673 offsets = arm_get_frame_offsets ();
22674 amount = offsets->outgoing_args - offsets->saved_regs;
22675
22676 if (frame_pointer_needed)
22677 {
22678 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22679 amount = offsets->locals_base - offsets->saved_regs;
22680 }
22681 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22682
22683 gcc_assert (amount >= 0);
22684 if (amount)
22685 {
22686 emit_insn (gen_blockage ());
22687
22688 if (amount < 512)
22689 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22690 GEN_INT (amount)));
22691 else
22692 {
22693 /* r3 is always free in the epilogue. */
22694 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22695
22696 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22697 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22698 }
22699 }
22700
22701 /* Emit a USE (stack_pointer_rtx), so that
22702 the stack adjustment will not be deleted. */
22703 emit_insn (gen_force_register_use (stack_pointer_rtx));
22704
22705 if (crtl->profile || !TARGET_SCHED_PROLOG)
22706 emit_insn (gen_blockage ());
22707
22708 /* Emit a clobber for each insn that will be restored in the epilogue,
22709 so that flow2 will get register lifetimes correct. */
22710 for (regno = 0; regno < 13; regno++)
22711 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22712 emit_clobber (gen_rtx_REG (SImode, regno));
22713
22714 if (! df_regs_ever_live_p (LR_REGNUM))
22715 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22716 }
22717
22718 /* Epilogue code for APCS frame. */
22719 static void
22720 arm_expand_epilogue_apcs_frame (bool really_return)
22721 {
22722 unsigned long func_type;
22723 unsigned long saved_regs_mask;
22724 int num_regs = 0;
22725 int i;
22726 int floats_from_frame = 0;
22727 arm_stack_offsets *offsets;
22728
22729 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22730 func_type = arm_current_func_type ();
22731
22732 /* Get frame offsets for ARM. */
22733 offsets = arm_get_frame_offsets ();
22734 saved_regs_mask = offsets->saved_regs_mask;
22735
22736 /* Find the offset of the floating-point save area in the frame. */
22737 floats_from_frame = offsets->saved_args - offsets->frame;
22738
22739 /* Compute how many core registers saved and how far away the floats are. */
22740 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22741 if (saved_regs_mask & (1 << i))
22742 {
22743 num_regs++;
22744 floats_from_frame += 4;
22745 }
22746
22747 if (TARGET_HARD_FLOAT && TARGET_VFP)
22748 {
22749 int start_reg;
22750
22751 /* The offset is from IP_REGNUM. */
22752 int saved_size = arm_get_vfp_saved_size ();
22753 if (saved_size > 0)
22754 {
22755 floats_from_frame += saved_size;
22756 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22757 hard_frame_pointer_rtx,
22758 GEN_INT (-floats_from_frame)));
22759 }
22760
22761 /* Generate VFP register multi-pop. */
22762 start_reg = FIRST_VFP_REGNUM;
22763
22764 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22765 /* Look for a case where a reg does not need restoring. */
22766 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22767 && (!df_regs_ever_live_p (i + 1)
22768 || call_used_regs[i + 1]))
22769 {
22770 if (start_reg != i)
22771 arm_emit_vfp_multi_reg_pop (start_reg,
22772 (i - start_reg) / 2,
22773 gen_rtx_REG (SImode,
22774 IP_REGNUM));
22775 start_reg = i + 2;
22776 }
22777
22778 /* Restore the remaining regs that we have discovered (or possibly
22779 even all of them, if the conditional in the for loop never
22780 fired). */
22781 if (start_reg != i)
22782 arm_emit_vfp_multi_reg_pop (start_reg,
22783 (i - start_reg) / 2,
22784 gen_rtx_REG (SImode, IP_REGNUM));
22785 }
22786
22787 if (TARGET_IWMMXT)
22788 {
22789 /* The frame pointer is guaranteed to be non-double-word aligned, as
22790 it is set to double-word-aligned old_stack_pointer - 4. */
22791 rtx insn;
22792 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22793
22794 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22795 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22796 {
22797 rtx addr = gen_frame_mem (V2SImode,
22798 plus_constant (Pmode, hard_frame_pointer_rtx,
22799 - lrm_count * 4));
22800 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22801 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22802 gen_rtx_REG (V2SImode, i),
22803 NULL_RTX);
22804 lrm_count += 2;
22805 }
22806 }
22807
22808 /* saved_regs_mask should contain IP which contains old stack pointer
22809 at the time of activation creation. Since SP and IP are adjacent registers,
22810 we can restore the value directly into SP. */
22811 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22812 saved_regs_mask &= ~(1 << IP_REGNUM);
22813 saved_regs_mask |= (1 << SP_REGNUM);
22814
22815 /* There are two registers left in saved_regs_mask - LR and PC. We
22816 only need to restore LR (the return address), but to
22817 save time we can load it directly into PC, unless we need a
22818 special function exit sequence, or we are not really returning. */
22819 if (really_return
22820 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22821 && !crtl->calls_eh_return)
22822 /* Delete LR from the register mask, so that LR on
22823 the stack is loaded into the PC in the register mask. */
22824 saved_regs_mask &= ~(1 << LR_REGNUM);
22825 else
22826 saved_regs_mask &= ~(1 << PC_REGNUM);
22827
22828 num_regs = bit_count (saved_regs_mask);
22829 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22830 {
22831 /* Unwind the stack to just below the saved registers. */
22832 emit_insn (gen_addsi3 (stack_pointer_rtx,
22833 hard_frame_pointer_rtx,
22834 GEN_INT (- 4 * num_regs)));
22835 }
22836
22837 arm_emit_multi_reg_pop (saved_regs_mask);
22838
22839 if (IS_INTERRUPT (func_type))
22840 {
22841 /* Interrupt handlers will have pushed the
22842 IP onto the stack, so restore it now. */
22843 rtx insn;
22844 rtx addr = gen_rtx_MEM (SImode,
22845 gen_rtx_POST_INC (SImode,
22846 stack_pointer_rtx));
22847 set_mem_alias_set (addr, get_frame_alias_set ());
22848 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22849 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22850 gen_rtx_REG (SImode, IP_REGNUM),
22851 NULL_RTX);
22852 }
22853
22854 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22855 return;
22856
22857 if (crtl->calls_eh_return)
22858 emit_insn (gen_addsi3 (stack_pointer_rtx,
22859 stack_pointer_rtx,
22860 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22861
22862 if (IS_STACKALIGN (func_type))
22863 /* Restore the original stack pointer. Before prologue, the stack was
22864 realigned and the original stack pointer saved in r0. For details,
22865 see comment in arm_expand_prologue. */
22866 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22867
22868 emit_jump_insn (simple_return_rtx);
22869 }
22870
22871 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22872 function is not a sibcall. */
22873 void
22874 arm_expand_epilogue (bool really_return)
22875 {
22876 unsigned long func_type;
22877 unsigned long saved_regs_mask;
22878 int num_regs = 0;
22879 int i;
22880 int amount;
22881 arm_stack_offsets *offsets;
22882
22883 func_type = arm_current_func_type ();
22884
22885 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22886 let output_return_instruction take care of instruction emition if any. */
22887 if (IS_NAKED (func_type)
22888 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22889 {
22890 emit_jump_insn (simple_return_rtx);
22891 return;
22892 }
22893
22894 /* If we are throwing an exception, then we really must be doing a
22895 return, so we can't tail-call. */
22896 gcc_assert (!crtl->calls_eh_return || really_return);
22897
22898 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22899 {
22900 arm_expand_epilogue_apcs_frame (really_return);
22901 return;
22902 }
22903
22904 /* Get frame offsets for ARM. */
22905 offsets = arm_get_frame_offsets ();
22906 saved_regs_mask = offsets->saved_regs_mask;
22907 num_regs = bit_count (saved_regs_mask);
22908
22909 if (frame_pointer_needed)
22910 {
22911 /* Restore stack pointer if necessary. */
22912 if (TARGET_ARM)
22913 {
22914 /* In ARM mode, frame pointer points to first saved register.
22915 Restore stack pointer to last saved register. */
22916 amount = offsets->frame - offsets->saved_regs;
22917
22918 /* Force out any pending memory operations that reference stacked data
22919 before stack de-allocation occurs. */
22920 emit_insn (gen_blockage ());
22921 emit_insn (gen_addsi3 (stack_pointer_rtx,
22922 hard_frame_pointer_rtx,
22923 GEN_INT (amount)));
22924
22925 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22926 deleted. */
22927 emit_insn (gen_force_register_use (stack_pointer_rtx));
22928 }
22929 else
22930 {
22931 /* In Thumb-2 mode, the frame pointer points to the last saved
22932 register. */
22933 amount = offsets->locals_base - offsets->saved_regs;
22934 if (amount)
22935 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22936 hard_frame_pointer_rtx,
22937 GEN_INT (amount)));
22938
22939 /* Force out any pending memory operations that reference stacked data
22940 before stack de-allocation occurs. */
22941 emit_insn (gen_blockage ());
22942 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22943 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22944 deleted. */
22945 emit_insn (gen_force_register_use (stack_pointer_rtx));
22946 }
22947 }
22948 else
22949 {
22950 /* Pop off outgoing args and local frame to adjust stack pointer to
22951 last saved register. */
22952 amount = offsets->outgoing_args - offsets->saved_regs;
22953 if (amount)
22954 {
22955 /* Force out any pending memory operations that reference stacked data
22956 before stack de-allocation occurs. */
22957 emit_insn (gen_blockage ());
22958 emit_insn (gen_addsi3 (stack_pointer_rtx,
22959 stack_pointer_rtx,
22960 GEN_INT (amount)));
22961 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22962 not deleted. */
22963 emit_insn (gen_force_register_use (stack_pointer_rtx));
22964 }
22965 }
22966
22967 if (TARGET_HARD_FLOAT && TARGET_VFP)
22968 {
22969 /* Generate VFP register multi-pop. */
22970 int end_reg = LAST_VFP_REGNUM + 1;
22971
22972 /* Scan the registers in reverse order. We need to match
22973 any groupings made in the prologue and generate matching
22974 vldm operations. The need to match groups is because,
22975 unlike pop, vldm can only do consecutive regs. */
22976 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22977 /* Look for a case where a reg does not need restoring. */
22978 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22979 && (!df_regs_ever_live_p (i + 1)
22980 || call_used_regs[i + 1]))
22981 {
22982 /* Restore the regs discovered so far (from reg+2 to
22983 end_reg). */
22984 if (end_reg > i + 2)
22985 arm_emit_vfp_multi_reg_pop (i + 2,
22986 (end_reg - (i + 2)) / 2,
22987 stack_pointer_rtx);
22988 end_reg = i;
22989 }
22990
22991 /* Restore the remaining regs that we have discovered (or possibly
22992 even all of them, if the conditional in the for loop never
22993 fired). */
22994 if (end_reg > i + 2)
22995 arm_emit_vfp_multi_reg_pop (i + 2,
22996 (end_reg - (i + 2)) / 2,
22997 stack_pointer_rtx);
22998 }
22999
23000 if (TARGET_IWMMXT)
23001 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
23002 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23003 {
23004 rtx insn;
23005 rtx addr = gen_rtx_MEM (V2SImode,
23006 gen_rtx_POST_INC (SImode,
23007 stack_pointer_rtx));
23008 set_mem_alias_set (addr, get_frame_alias_set ());
23009 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23010 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23011 gen_rtx_REG (V2SImode, i),
23012 NULL_RTX);
23013 }
23014
23015 if (saved_regs_mask)
23016 {
23017 rtx insn;
23018 bool return_in_pc = false;
23019
23020 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
23021 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
23022 && !IS_STACKALIGN (func_type)
23023 && really_return
23024 && crtl->args.pretend_args_size == 0
23025 && saved_regs_mask & (1 << LR_REGNUM)
23026 && !crtl->calls_eh_return)
23027 {
23028 saved_regs_mask &= ~(1 << LR_REGNUM);
23029 saved_regs_mask |= (1 << PC_REGNUM);
23030 return_in_pc = true;
23031 }
23032
23033 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
23034 {
23035 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23036 if (saved_regs_mask & (1 << i))
23037 {
23038 rtx addr = gen_rtx_MEM (SImode,
23039 gen_rtx_POST_INC (SImode,
23040 stack_pointer_rtx));
23041 set_mem_alias_set (addr, get_frame_alias_set ());
23042
23043 if (i == PC_REGNUM)
23044 {
23045 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23046 XVECEXP (insn, 0, 0) = ret_rtx;
23047 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
23048 gen_rtx_REG (SImode, i),
23049 addr);
23050 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
23051 insn = emit_jump_insn (insn);
23052 }
23053 else
23054 {
23055 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
23056 addr));
23057 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23058 gen_rtx_REG (SImode, i),
23059 NULL_RTX);
23060 }
23061 }
23062 }
23063 else
23064 {
23065 arm_emit_multi_reg_pop (saved_regs_mask);
23066 }
23067
23068 if (return_in_pc == true)
23069 return;
23070 }
23071
23072 if (crtl->args.pretend_args_size)
23073 emit_insn (gen_addsi3 (stack_pointer_rtx,
23074 stack_pointer_rtx,
23075 GEN_INT (crtl->args.pretend_args_size)));
23076
23077 if (!really_return)
23078 return;
23079
23080 if (crtl->calls_eh_return)
23081 emit_insn (gen_addsi3 (stack_pointer_rtx,
23082 stack_pointer_rtx,
23083 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23084
23085 if (IS_STACKALIGN (func_type))
23086 /* Restore the original stack pointer. Before prologue, the stack was
23087 realigned and the original stack pointer saved in r0. For details,
23088 see comment in arm_expand_prologue. */
23089 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23090
23091 emit_jump_insn (simple_return_rtx);
23092 }
23093
23094 /* Implementation of insn prologue_thumb1_interwork. This is the first
23095 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23096
23097 const char *
23098 thumb1_output_interwork (void)
23099 {
23100 const char * name;
23101 FILE *f = asm_out_file;
23102
23103 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23104 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23105 == SYMBOL_REF);
23106 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23107
23108 /* Generate code sequence to switch us into Thumb mode. */
23109 /* The .code 32 directive has already been emitted by
23110 ASM_DECLARE_FUNCTION_NAME. */
23111 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23112 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23113
23114 /* Generate a label, so that the debugger will notice the
23115 change in instruction sets. This label is also used by
23116 the assembler to bypass the ARM code when this function
23117 is called from a Thumb encoded function elsewhere in the
23118 same file. Hence the definition of STUB_NAME here must
23119 agree with the definition in gas/config/tc-arm.c. */
23120
23121 #define STUB_NAME ".real_start_of"
23122
23123 fprintf (f, "\t.code\t16\n");
23124 #ifdef ARM_PE
23125 if (arm_dllexport_name_p (name))
23126 name = arm_strip_name_encoding (name);
23127 #endif
23128 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23129 fprintf (f, "\t.thumb_func\n");
23130 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23131
23132 return "";
23133 }
23134
23135 /* Handle the case of a double word load into a low register from
23136 a computed memory address. The computed address may involve a
23137 register which is overwritten by the load. */
23138 const char *
23139 thumb_load_double_from_address (rtx *operands)
23140 {
23141 rtx addr;
23142 rtx base;
23143 rtx offset;
23144 rtx arg1;
23145 rtx arg2;
23146
23147 gcc_assert (REG_P (operands[0]));
23148 gcc_assert (MEM_P (operands[1]));
23149
23150 /* Get the memory address. */
23151 addr = XEXP (operands[1], 0);
23152
23153 /* Work out how the memory address is computed. */
23154 switch (GET_CODE (addr))
23155 {
23156 case REG:
23157 operands[2] = adjust_address (operands[1], SImode, 4);
23158
23159 if (REGNO (operands[0]) == REGNO (addr))
23160 {
23161 output_asm_insn ("ldr\t%H0, %2", operands);
23162 output_asm_insn ("ldr\t%0, %1", operands);
23163 }
23164 else
23165 {
23166 output_asm_insn ("ldr\t%0, %1", operands);
23167 output_asm_insn ("ldr\t%H0, %2", operands);
23168 }
23169 break;
23170
23171 case CONST:
23172 /* Compute <address> + 4 for the high order load. */
23173 operands[2] = adjust_address (operands[1], SImode, 4);
23174
23175 output_asm_insn ("ldr\t%0, %1", operands);
23176 output_asm_insn ("ldr\t%H0, %2", operands);
23177 break;
23178
23179 case PLUS:
23180 arg1 = XEXP (addr, 0);
23181 arg2 = XEXP (addr, 1);
23182
23183 if (CONSTANT_P (arg1))
23184 base = arg2, offset = arg1;
23185 else
23186 base = arg1, offset = arg2;
23187
23188 gcc_assert (REG_P (base));
23189
23190 /* Catch the case of <address> = <reg> + <reg> */
23191 if (REG_P (offset))
23192 {
23193 int reg_offset = REGNO (offset);
23194 int reg_base = REGNO (base);
23195 int reg_dest = REGNO (operands[0]);
23196
23197 /* Add the base and offset registers together into the
23198 higher destination register. */
23199 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23200 reg_dest + 1, reg_base, reg_offset);
23201
23202 /* Load the lower destination register from the address in
23203 the higher destination register. */
23204 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23205 reg_dest, reg_dest + 1);
23206
23207 /* Load the higher destination register from its own address
23208 plus 4. */
23209 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23210 reg_dest + 1, reg_dest + 1);
23211 }
23212 else
23213 {
23214 /* Compute <address> + 4 for the high order load. */
23215 operands[2] = adjust_address (operands[1], SImode, 4);
23216
23217 /* If the computed address is held in the low order register
23218 then load the high order register first, otherwise always
23219 load the low order register first. */
23220 if (REGNO (operands[0]) == REGNO (base))
23221 {
23222 output_asm_insn ("ldr\t%H0, %2", operands);
23223 output_asm_insn ("ldr\t%0, %1", operands);
23224 }
23225 else
23226 {
23227 output_asm_insn ("ldr\t%0, %1", operands);
23228 output_asm_insn ("ldr\t%H0, %2", operands);
23229 }
23230 }
23231 break;
23232
23233 case LABEL_REF:
23234 /* With no registers to worry about we can just load the value
23235 directly. */
23236 operands[2] = adjust_address (operands[1], SImode, 4);
23237
23238 output_asm_insn ("ldr\t%H0, %2", operands);
23239 output_asm_insn ("ldr\t%0, %1", operands);
23240 break;
23241
23242 default:
23243 gcc_unreachable ();
23244 }
23245
23246 return "";
23247 }
23248
23249 const char *
23250 thumb_output_move_mem_multiple (int n, rtx *operands)
23251 {
23252 rtx tmp;
23253
23254 switch (n)
23255 {
23256 case 2:
23257 if (REGNO (operands[4]) > REGNO (operands[5]))
23258 {
23259 tmp = operands[4];
23260 operands[4] = operands[5];
23261 operands[5] = tmp;
23262 }
23263 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23264 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23265 break;
23266
23267 case 3:
23268 if (REGNO (operands[4]) > REGNO (operands[5]))
23269 {
23270 tmp = operands[4];
23271 operands[4] = operands[5];
23272 operands[5] = tmp;
23273 }
23274 if (REGNO (operands[5]) > REGNO (operands[6]))
23275 {
23276 tmp = operands[5];
23277 operands[5] = operands[6];
23278 operands[6] = tmp;
23279 }
23280 if (REGNO (operands[4]) > REGNO (operands[5]))
23281 {
23282 tmp = operands[4];
23283 operands[4] = operands[5];
23284 operands[5] = tmp;
23285 }
23286
23287 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23288 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23289 break;
23290
23291 default:
23292 gcc_unreachable ();
23293 }
23294
23295 return "";
23296 }
23297
23298 /* Output a call-via instruction for thumb state. */
23299 const char *
23300 thumb_call_via_reg (rtx reg)
23301 {
23302 int regno = REGNO (reg);
23303 rtx *labelp;
23304
23305 gcc_assert (regno < LR_REGNUM);
23306
23307 /* If we are in the normal text section we can use a single instance
23308 per compilation unit. If we are doing function sections, then we need
23309 an entry per section, since we can't rely on reachability. */
23310 if (in_section == text_section)
23311 {
23312 thumb_call_reg_needed = 1;
23313
23314 if (thumb_call_via_label[regno] == NULL)
23315 thumb_call_via_label[regno] = gen_label_rtx ();
23316 labelp = thumb_call_via_label + regno;
23317 }
23318 else
23319 {
23320 if (cfun->machine->call_via[regno] == NULL)
23321 cfun->machine->call_via[regno] = gen_label_rtx ();
23322 labelp = cfun->machine->call_via + regno;
23323 }
23324
23325 output_asm_insn ("bl\t%a0", labelp);
23326 return "";
23327 }
23328
23329 /* Routines for generating rtl. */
23330 void
23331 thumb_expand_movmemqi (rtx *operands)
23332 {
23333 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23334 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23335 HOST_WIDE_INT len = INTVAL (operands[2]);
23336 HOST_WIDE_INT offset = 0;
23337
23338 while (len >= 12)
23339 {
23340 emit_insn (gen_movmem12b (out, in, out, in));
23341 len -= 12;
23342 }
23343
23344 if (len >= 8)
23345 {
23346 emit_insn (gen_movmem8b (out, in, out, in));
23347 len -= 8;
23348 }
23349
23350 if (len >= 4)
23351 {
23352 rtx reg = gen_reg_rtx (SImode);
23353 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23354 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23355 len -= 4;
23356 offset += 4;
23357 }
23358
23359 if (len >= 2)
23360 {
23361 rtx reg = gen_reg_rtx (HImode);
23362 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23363 plus_constant (Pmode, in,
23364 offset))));
23365 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23366 offset)),
23367 reg));
23368 len -= 2;
23369 offset += 2;
23370 }
23371
23372 if (len)
23373 {
23374 rtx reg = gen_reg_rtx (QImode);
23375 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23376 plus_constant (Pmode, in,
23377 offset))));
23378 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23379 offset)),
23380 reg));
23381 }
23382 }
23383
23384 void
23385 thumb_reload_out_hi (rtx *operands)
23386 {
23387 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23388 }
23389
23390 /* Handle reading a half-word from memory during reload. */
23391 void
23392 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23393 {
23394 gcc_unreachable ();
23395 }
23396
23397 /* Return the length of a function name prefix
23398 that starts with the character 'c'. */
23399 static int
23400 arm_get_strip_length (int c)
23401 {
23402 switch (c)
23403 {
23404 ARM_NAME_ENCODING_LENGTHS
23405 default: return 0;
23406 }
23407 }
23408
23409 /* Return a pointer to a function's name with any
23410 and all prefix encodings stripped from it. */
23411 const char *
23412 arm_strip_name_encoding (const char *name)
23413 {
23414 int skip;
23415
23416 while ((skip = arm_get_strip_length (* name)))
23417 name += skip;
23418
23419 return name;
23420 }
23421
23422 /* If there is a '*' anywhere in the name's prefix, then
23423 emit the stripped name verbatim, otherwise prepend an
23424 underscore if leading underscores are being used. */
23425 void
23426 arm_asm_output_labelref (FILE *stream, const char *name)
23427 {
23428 int skip;
23429 int verbatim = 0;
23430
23431 while ((skip = arm_get_strip_length (* name)))
23432 {
23433 verbatim |= (*name == '*');
23434 name += skip;
23435 }
23436
23437 if (verbatim)
23438 fputs (name, stream);
23439 else
23440 asm_fprintf (stream, "%U%s", name);
23441 }
23442
23443 /* This function is used to emit an EABI tag and its associated value.
23444 We emit the numerical value of the tag in case the assembler does not
23445 support textual tags. (Eg gas prior to 2.20). If requested we include
23446 the tag name in a comment so that anyone reading the assembler output
23447 will know which tag is being set.
23448
23449 This function is not static because arm-c.c needs it too. */
23450
23451 void
23452 arm_emit_eabi_attribute (const char *name, int num, int val)
23453 {
23454 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23455 if (flag_verbose_asm || flag_debug_asm)
23456 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23457 asm_fprintf (asm_out_file, "\n");
23458 }
23459
23460 static void
23461 arm_file_start (void)
23462 {
23463 int val;
23464
23465 if (TARGET_UNIFIED_ASM)
23466 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23467
23468 if (TARGET_BPABI)
23469 {
23470 const char *fpu_name;
23471 if (arm_selected_arch)
23472 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23473 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23474 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23475 else
23476 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23477
23478 if (TARGET_SOFT_FLOAT)
23479 {
23480 fpu_name = "softvfp";
23481 }
23482 else
23483 {
23484 fpu_name = arm_fpu_desc->name;
23485 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23486 {
23487 if (TARGET_HARD_FLOAT)
23488 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23489 if (TARGET_HARD_FLOAT_ABI)
23490 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23491 }
23492 }
23493 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23494
23495 /* Some of these attributes only apply when the corresponding features
23496 are used. However we don't have any easy way of figuring this out.
23497 Conservatively record the setting that would have been used. */
23498
23499 if (flag_rounding_math)
23500 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23501
23502 if (!flag_unsafe_math_optimizations)
23503 {
23504 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23505 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23506 }
23507 if (flag_signaling_nans)
23508 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23509
23510 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23511 flag_finite_math_only ? 1 : 3);
23512
23513 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23514 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23515 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23516 flag_short_enums ? 1 : 2);
23517
23518 /* Tag_ABI_optimization_goals. */
23519 if (optimize_size)
23520 val = 4;
23521 else if (optimize >= 2)
23522 val = 2;
23523 else if (optimize)
23524 val = 1;
23525 else
23526 val = 6;
23527 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23528
23529 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23530 unaligned_access);
23531
23532 if (arm_fp16_format)
23533 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23534 (int) arm_fp16_format);
23535
23536 if (arm_lang_output_object_attributes_hook)
23537 arm_lang_output_object_attributes_hook();
23538 }
23539
23540 default_file_start ();
23541 }
23542
23543 static void
23544 arm_file_end (void)
23545 {
23546 int regno;
23547
23548 if (NEED_INDICATE_EXEC_STACK)
23549 /* Add .note.GNU-stack. */
23550 file_end_indicate_exec_stack ();
23551
23552 if (! thumb_call_reg_needed)
23553 return;
23554
23555 switch_to_section (text_section);
23556 asm_fprintf (asm_out_file, "\t.code 16\n");
23557 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23558
23559 for (regno = 0; regno < LR_REGNUM; regno++)
23560 {
23561 rtx label = thumb_call_via_label[regno];
23562
23563 if (label != 0)
23564 {
23565 targetm.asm_out.internal_label (asm_out_file, "L",
23566 CODE_LABEL_NUMBER (label));
23567 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23568 }
23569 }
23570 }
23571
23572 #ifndef ARM_PE
23573 /* Symbols in the text segment can be accessed without indirecting via the
23574 constant pool; it may take an extra binary operation, but this is still
23575 faster than indirecting via memory. Don't do this when not optimizing,
23576 since we won't be calculating al of the offsets necessary to do this
23577 simplification. */
23578
23579 static void
23580 arm_encode_section_info (tree decl, rtx rtl, int first)
23581 {
23582 if (optimize > 0 && TREE_CONSTANT (decl))
23583 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23584
23585 default_encode_section_info (decl, rtl, first);
23586 }
23587 #endif /* !ARM_PE */
23588
23589 static void
23590 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23591 {
23592 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23593 && !strcmp (prefix, "L"))
23594 {
23595 arm_ccfsm_state = 0;
23596 arm_target_insn = NULL;
23597 }
23598 default_internal_label (stream, prefix, labelno);
23599 }
23600
23601 /* Output code to add DELTA to the first argument, and then jump
23602 to FUNCTION. Used for C++ multiple inheritance. */
23603 static void
23604 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23605 HOST_WIDE_INT delta,
23606 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23607 tree function)
23608 {
23609 static int thunk_label = 0;
23610 char label[256];
23611 char labelpc[256];
23612 int mi_delta = delta;
23613 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23614 int shift = 0;
23615 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23616 ? 1 : 0);
23617 if (mi_delta < 0)
23618 mi_delta = - mi_delta;
23619
23620 if (TARGET_THUMB1)
23621 {
23622 int labelno = thunk_label++;
23623 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23624 /* Thunks are entered in arm mode when avaiable. */
23625 if (TARGET_THUMB1_ONLY)
23626 {
23627 /* push r3 so we can use it as a temporary. */
23628 /* TODO: Omit this save if r3 is not used. */
23629 fputs ("\tpush {r3}\n", file);
23630 fputs ("\tldr\tr3, ", file);
23631 }
23632 else
23633 {
23634 fputs ("\tldr\tr12, ", file);
23635 }
23636 assemble_name (file, label);
23637 fputc ('\n', file);
23638 if (flag_pic)
23639 {
23640 /* If we are generating PIC, the ldr instruction below loads
23641 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23642 the address of the add + 8, so we have:
23643
23644 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23645 = target + 1.
23646
23647 Note that we have "+ 1" because some versions of GNU ld
23648 don't set the low bit of the result for R_ARM_REL32
23649 relocations against thumb function symbols.
23650 On ARMv6M this is +4, not +8. */
23651 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23652 assemble_name (file, labelpc);
23653 fputs (":\n", file);
23654 if (TARGET_THUMB1_ONLY)
23655 {
23656 /* This is 2 insns after the start of the thunk, so we know it
23657 is 4-byte aligned. */
23658 fputs ("\tadd\tr3, pc, r3\n", file);
23659 fputs ("\tmov r12, r3\n", file);
23660 }
23661 else
23662 fputs ("\tadd\tr12, pc, r12\n", file);
23663 }
23664 else if (TARGET_THUMB1_ONLY)
23665 fputs ("\tmov r12, r3\n", file);
23666 }
23667 if (TARGET_THUMB1_ONLY)
23668 {
23669 if (mi_delta > 255)
23670 {
23671 fputs ("\tldr\tr3, ", file);
23672 assemble_name (file, label);
23673 fputs ("+4\n", file);
23674 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23675 mi_op, this_regno, this_regno);
23676 }
23677 else if (mi_delta != 0)
23678 {
23679 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23680 mi_op, this_regno, this_regno,
23681 mi_delta);
23682 }
23683 }
23684 else
23685 {
23686 /* TODO: Use movw/movt for large constants when available. */
23687 while (mi_delta != 0)
23688 {
23689 if ((mi_delta & (3 << shift)) == 0)
23690 shift += 2;
23691 else
23692 {
23693 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23694 mi_op, this_regno, this_regno,
23695 mi_delta & (0xff << shift));
23696 mi_delta &= ~(0xff << shift);
23697 shift += 8;
23698 }
23699 }
23700 }
23701 if (TARGET_THUMB1)
23702 {
23703 if (TARGET_THUMB1_ONLY)
23704 fputs ("\tpop\t{r3}\n", file);
23705
23706 fprintf (file, "\tbx\tr12\n");
23707 ASM_OUTPUT_ALIGN (file, 2);
23708 assemble_name (file, label);
23709 fputs (":\n", file);
23710 if (flag_pic)
23711 {
23712 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23713 rtx tem = XEXP (DECL_RTL (function), 0);
23714 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23715 tem = gen_rtx_MINUS (GET_MODE (tem),
23716 tem,
23717 gen_rtx_SYMBOL_REF (Pmode,
23718 ggc_strdup (labelpc)));
23719 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23720 }
23721 else
23722 /* Output ".word .LTHUNKn". */
23723 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23724
23725 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23726 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23727 }
23728 else
23729 {
23730 fputs ("\tb\t", file);
23731 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23732 if (NEED_PLT_RELOC)
23733 fputs ("(PLT)", file);
23734 fputc ('\n', file);
23735 }
23736 }
23737
23738 int
23739 arm_emit_vector_const (FILE *file, rtx x)
23740 {
23741 int i;
23742 const char * pattern;
23743
23744 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23745
23746 switch (GET_MODE (x))
23747 {
23748 case V2SImode: pattern = "%08x"; break;
23749 case V4HImode: pattern = "%04x"; break;
23750 case V8QImode: pattern = "%02x"; break;
23751 default: gcc_unreachable ();
23752 }
23753
23754 fprintf (file, "0x");
23755 for (i = CONST_VECTOR_NUNITS (x); i--;)
23756 {
23757 rtx element;
23758
23759 element = CONST_VECTOR_ELT (x, i);
23760 fprintf (file, pattern, INTVAL (element));
23761 }
23762
23763 return 1;
23764 }
23765
23766 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23767 HFmode constant pool entries are actually loaded with ldr. */
23768 void
23769 arm_emit_fp16_const (rtx c)
23770 {
23771 REAL_VALUE_TYPE r;
23772 long bits;
23773
23774 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23775 bits = real_to_target (NULL, &r, HFmode);
23776 if (WORDS_BIG_ENDIAN)
23777 assemble_zeros (2);
23778 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23779 if (!WORDS_BIG_ENDIAN)
23780 assemble_zeros (2);
23781 }
23782
23783 const char *
23784 arm_output_load_gr (rtx *operands)
23785 {
23786 rtx reg;
23787 rtx offset;
23788 rtx wcgr;
23789 rtx sum;
23790
23791 if (!MEM_P (operands [1])
23792 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23793 || !REG_P (reg = XEXP (sum, 0))
23794 || !CONST_INT_P (offset = XEXP (sum, 1))
23795 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23796 return "wldrw%?\t%0, %1";
23797
23798 /* Fix up an out-of-range load of a GR register. */
23799 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23800 wcgr = operands[0];
23801 operands[0] = reg;
23802 output_asm_insn ("ldr%?\t%0, %1", operands);
23803
23804 operands[0] = wcgr;
23805 operands[1] = reg;
23806 output_asm_insn ("tmcr%?\t%0, %1", operands);
23807 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23808
23809 return "";
23810 }
23811
23812 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23813
23814 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23815 named arg and all anonymous args onto the stack.
23816 XXX I know the prologue shouldn't be pushing registers, but it is faster
23817 that way. */
23818
23819 static void
23820 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23821 enum machine_mode mode,
23822 tree type,
23823 int *pretend_size,
23824 int second_time ATTRIBUTE_UNUSED)
23825 {
23826 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23827 int nregs;
23828
23829 cfun->machine->uses_anonymous_args = 1;
23830 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23831 {
23832 nregs = pcum->aapcs_ncrn;
23833 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23834 nregs++;
23835 }
23836 else
23837 nregs = pcum->nregs;
23838
23839 if (nregs < NUM_ARG_REGS)
23840 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23841 }
23842
23843 /* Return nonzero if the CONSUMER instruction (a store) does not need
23844 PRODUCER's value to calculate the address. */
23845
23846 int
23847 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23848 {
23849 rtx value = PATTERN (producer);
23850 rtx addr = PATTERN (consumer);
23851
23852 if (GET_CODE (value) == COND_EXEC)
23853 value = COND_EXEC_CODE (value);
23854 if (GET_CODE (value) == PARALLEL)
23855 value = XVECEXP (value, 0, 0);
23856 value = XEXP (value, 0);
23857 if (GET_CODE (addr) == COND_EXEC)
23858 addr = COND_EXEC_CODE (addr);
23859 if (GET_CODE (addr) == PARALLEL)
23860 addr = XVECEXP (addr, 0, 0);
23861 addr = XEXP (addr, 0);
23862
23863 return !reg_overlap_mentioned_p (value, addr);
23864 }
23865
23866 /* Return nonzero if the CONSUMER instruction (a store) does need
23867 PRODUCER's value to calculate the address. */
23868
23869 int
23870 arm_early_store_addr_dep (rtx producer, rtx consumer)
23871 {
23872 return !arm_no_early_store_addr_dep (producer, consumer);
23873 }
23874
23875 /* Return nonzero if the CONSUMER instruction (a load) does need
23876 PRODUCER's value to calculate the address. */
23877
23878 int
23879 arm_early_load_addr_dep (rtx producer, rtx consumer)
23880 {
23881 rtx value = PATTERN (producer);
23882 rtx addr = PATTERN (consumer);
23883
23884 if (GET_CODE (value) == COND_EXEC)
23885 value = COND_EXEC_CODE (value);
23886 if (GET_CODE (value) == PARALLEL)
23887 value = XVECEXP (value, 0, 0);
23888 value = XEXP (value, 0);
23889 if (GET_CODE (addr) == COND_EXEC)
23890 addr = COND_EXEC_CODE (addr);
23891 if (GET_CODE (addr) == PARALLEL)
23892 {
23893 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
23894 addr = XVECEXP (addr, 0, 1);
23895 else
23896 addr = XVECEXP (addr, 0, 0);
23897 }
23898 addr = XEXP (addr, 1);
23899
23900 return reg_overlap_mentioned_p (value, addr);
23901 }
23902
23903 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23904 have an early register shift value or amount dependency on the
23905 result of PRODUCER. */
23906
23907 int
23908 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23909 {
23910 rtx value = PATTERN (producer);
23911 rtx op = PATTERN (consumer);
23912 rtx early_op;
23913
23914 if (GET_CODE (value) == COND_EXEC)
23915 value = COND_EXEC_CODE (value);
23916 if (GET_CODE (value) == PARALLEL)
23917 value = XVECEXP (value, 0, 0);
23918 value = XEXP (value, 0);
23919 if (GET_CODE (op) == COND_EXEC)
23920 op = COND_EXEC_CODE (op);
23921 if (GET_CODE (op) == PARALLEL)
23922 op = XVECEXP (op, 0, 0);
23923 op = XEXP (op, 1);
23924
23925 early_op = XEXP (op, 0);
23926 /* This is either an actual independent shift, or a shift applied to
23927 the first operand of another operation. We want the whole shift
23928 operation. */
23929 if (REG_P (early_op))
23930 early_op = op;
23931
23932 return !reg_overlap_mentioned_p (value, early_op);
23933 }
23934
23935 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23936 have an early register shift value dependency on the result of
23937 PRODUCER. */
23938
23939 int
23940 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23941 {
23942 rtx value = PATTERN (producer);
23943 rtx op = PATTERN (consumer);
23944 rtx early_op;
23945
23946 if (GET_CODE (value) == COND_EXEC)
23947 value = COND_EXEC_CODE (value);
23948 if (GET_CODE (value) == PARALLEL)
23949 value = XVECEXP (value, 0, 0);
23950 value = XEXP (value, 0);
23951 if (GET_CODE (op) == COND_EXEC)
23952 op = COND_EXEC_CODE (op);
23953 if (GET_CODE (op) == PARALLEL)
23954 op = XVECEXP (op, 0, 0);
23955 op = XEXP (op, 1);
23956
23957 early_op = XEXP (op, 0);
23958
23959 /* This is either an actual independent shift, or a shift applied to
23960 the first operand of another operation. We want the value being
23961 shifted, in either case. */
23962 if (!REG_P (early_op))
23963 early_op = XEXP (early_op, 0);
23964
23965 return !reg_overlap_mentioned_p (value, early_op);
23966 }
23967
23968 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23969 have an early register mult dependency on the result of
23970 PRODUCER. */
23971
23972 int
23973 arm_no_early_mul_dep (rtx producer, rtx consumer)
23974 {
23975 rtx value = PATTERN (producer);
23976 rtx op = PATTERN (consumer);
23977
23978 if (GET_CODE (value) == COND_EXEC)
23979 value = COND_EXEC_CODE (value);
23980 if (GET_CODE (value) == PARALLEL)
23981 value = XVECEXP (value, 0, 0);
23982 value = XEXP (value, 0);
23983 if (GET_CODE (op) == COND_EXEC)
23984 op = COND_EXEC_CODE (op);
23985 if (GET_CODE (op) == PARALLEL)
23986 op = XVECEXP (op, 0, 0);
23987 op = XEXP (op, 1);
23988
23989 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23990 {
23991 if (GET_CODE (XEXP (op, 0)) == MULT)
23992 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23993 else
23994 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23995 }
23996
23997 return 0;
23998 }
23999
24000 /* We can't rely on the caller doing the proper promotion when
24001 using APCS or ATPCS. */
24002
24003 static bool
24004 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
24005 {
24006 return !TARGET_AAPCS_BASED;
24007 }
24008
24009 static enum machine_mode
24010 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
24011 enum machine_mode mode,
24012 int *punsignedp ATTRIBUTE_UNUSED,
24013 const_tree fntype ATTRIBUTE_UNUSED,
24014 int for_return ATTRIBUTE_UNUSED)
24015 {
24016 if (GET_MODE_CLASS (mode) == MODE_INT
24017 && GET_MODE_SIZE (mode) < 4)
24018 return SImode;
24019
24020 return mode;
24021 }
24022
24023 /* AAPCS based ABIs use short enums by default. */
24024
24025 static bool
24026 arm_default_short_enums (void)
24027 {
24028 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
24029 }
24030
24031
24032 /* AAPCS requires that anonymous bitfields affect structure alignment. */
24033
24034 static bool
24035 arm_align_anon_bitfield (void)
24036 {
24037 return TARGET_AAPCS_BASED;
24038 }
24039
24040
24041 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
24042
24043 static tree
24044 arm_cxx_guard_type (void)
24045 {
24046 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
24047 }
24048
24049 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24050 has an accumulator dependency on the result of the producer (a
24051 multiplication instruction) and no other dependency on that result. */
24052 int
24053 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
24054 {
24055 rtx mul = PATTERN (producer);
24056 rtx mac = PATTERN (consumer);
24057 rtx mul_result;
24058 rtx mac_op0, mac_op1, mac_acc;
24059
24060 if (GET_CODE (mul) == COND_EXEC)
24061 mul = COND_EXEC_CODE (mul);
24062 if (GET_CODE (mac) == COND_EXEC)
24063 mac = COND_EXEC_CODE (mac);
24064
24065 /* Check that mul is of the form (set (...) (mult ...))
24066 and mla is of the form (set (...) (plus (mult ...) (...))). */
24067 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
24068 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
24069 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
24070 return 0;
24071
24072 mul_result = XEXP (mul, 0);
24073 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
24074 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
24075 mac_acc = XEXP (XEXP (mac, 1), 1);
24076
24077 return (reg_overlap_mentioned_p (mul_result, mac_acc)
24078 && !reg_overlap_mentioned_p (mul_result, mac_op0)
24079 && !reg_overlap_mentioned_p (mul_result, mac_op1));
24080 }
24081
24082
24083 /* The EABI says test the least significant bit of a guard variable. */
24084
24085 static bool
24086 arm_cxx_guard_mask_bit (void)
24087 {
24088 return TARGET_AAPCS_BASED;
24089 }
24090
24091
24092 /* The EABI specifies that all array cookies are 8 bytes long. */
24093
24094 static tree
24095 arm_get_cookie_size (tree type)
24096 {
24097 tree size;
24098
24099 if (!TARGET_AAPCS_BASED)
24100 return default_cxx_get_cookie_size (type);
24101
24102 size = build_int_cst (sizetype, 8);
24103 return size;
24104 }
24105
24106
24107 /* The EABI says that array cookies should also contain the element size. */
24108
24109 static bool
24110 arm_cookie_has_size (void)
24111 {
24112 return TARGET_AAPCS_BASED;
24113 }
24114
24115
24116 /* The EABI says constructors and destructors should return a pointer to
24117 the object constructed/destroyed. */
24118
24119 static bool
24120 arm_cxx_cdtor_returns_this (void)
24121 {
24122 return TARGET_AAPCS_BASED;
24123 }
24124
24125 /* The EABI says that an inline function may never be the key
24126 method. */
24127
24128 static bool
24129 arm_cxx_key_method_may_be_inline (void)
24130 {
24131 return !TARGET_AAPCS_BASED;
24132 }
24133
24134 static void
24135 arm_cxx_determine_class_data_visibility (tree decl)
24136 {
24137 if (!TARGET_AAPCS_BASED
24138 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24139 return;
24140
24141 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24142 is exported. However, on systems without dynamic vague linkage,
24143 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24144 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24145 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24146 else
24147 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24148 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24149 }
24150
24151 static bool
24152 arm_cxx_class_data_always_comdat (void)
24153 {
24154 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24155 vague linkage if the class has no key function. */
24156 return !TARGET_AAPCS_BASED;
24157 }
24158
24159
24160 /* The EABI says __aeabi_atexit should be used to register static
24161 destructors. */
24162
24163 static bool
24164 arm_cxx_use_aeabi_atexit (void)
24165 {
24166 return TARGET_AAPCS_BASED;
24167 }
24168
24169
24170 void
24171 arm_set_return_address (rtx source, rtx scratch)
24172 {
24173 arm_stack_offsets *offsets;
24174 HOST_WIDE_INT delta;
24175 rtx addr;
24176 unsigned long saved_regs;
24177
24178 offsets = arm_get_frame_offsets ();
24179 saved_regs = offsets->saved_regs_mask;
24180
24181 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24182 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24183 else
24184 {
24185 if (frame_pointer_needed)
24186 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24187 else
24188 {
24189 /* LR will be the first saved register. */
24190 delta = offsets->outgoing_args - (offsets->frame + 4);
24191
24192
24193 if (delta >= 4096)
24194 {
24195 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24196 GEN_INT (delta & ~4095)));
24197 addr = scratch;
24198 delta &= 4095;
24199 }
24200 else
24201 addr = stack_pointer_rtx;
24202
24203 addr = plus_constant (Pmode, addr, delta);
24204 }
24205 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24206 }
24207 }
24208
24209
24210 void
24211 thumb_set_return_address (rtx source, rtx scratch)
24212 {
24213 arm_stack_offsets *offsets;
24214 HOST_WIDE_INT delta;
24215 HOST_WIDE_INT limit;
24216 int reg;
24217 rtx addr;
24218 unsigned long mask;
24219
24220 emit_use (source);
24221
24222 offsets = arm_get_frame_offsets ();
24223 mask = offsets->saved_regs_mask;
24224 if (mask & (1 << LR_REGNUM))
24225 {
24226 limit = 1024;
24227 /* Find the saved regs. */
24228 if (frame_pointer_needed)
24229 {
24230 delta = offsets->soft_frame - offsets->saved_args;
24231 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24232 if (TARGET_THUMB1)
24233 limit = 128;
24234 }
24235 else
24236 {
24237 delta = offsets->outgoing_args - offsets->saved_args;
24238 reg = SP_REGNUM;
24239 }
24240 /* Allow for the stack frame. */
24241 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24242 delta -= 16;
24243 /* The link register is always the first saved register. */
24244 delta -= 4;
24245
24246 /* Construct the address. */
24247 addr = gen_rtx_REG (SImode, reg);
24248 if (delta > limit)
24249 {
24250 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24251 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24252 addr = scratch;
24253 }
24254 else
24255 addr = plus_constant (Pmode, addr, delta);
24256
24257 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24258 }
24259 else
24260 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24261 }
24262
24263 /* Implements target hook vector_mode_supported_p. */
24264 bool
24265 arm_vector_mode_supported_p (enum machine_mode mode)
24266 {
24267 /* Neon also supports V2SImode, etc. listed in the clause below. */
24268 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24269 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24270 return true;
24271
24272 if ((TARGET_NEON || TARGET_IWMMXT)
24273 && ((mode == V2SImode)
24274 || (mode == V4HImode)
24275 || (mode == V8QImode)))
24276 return true;
24277
24278 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24279 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24280 || mode == V2HAmode))
24281 return true;
24282
24283 return false;
24284 }
24285
24286 /* Implements target hook array_mode_supported_p. */
24287
24288 static bool
24289 arm_array_mode_supported_p (enum machine_mode mode,
24290 unsigned HOST_WIDE_INT nelems)
24291 {
24292 if (TARGET_NEON
24293 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24294 && (nelems >= 2 && nelems <= 4))
24295 return true;
24296
24297 return false;
24298 }
24299
24300 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24301 registers when autovectorizing for Neon, at least until multiple vector
24302 widths are supported properly by the middle-end. */
24303
24304 static enum machine_mode
24305 arm_preferred_simd_mode (enum machine_mode mode)
24306 {
24307 if (TARGET_NEON)
24308 switch (mode)
24309 {
24310 case SFmode:
24311 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24312 case SImode:
24313 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24314 case HImode:
24315 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24316 case QImode:
24317 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24318 case DImode:
24319 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24320 return V2DImode;
24321 break;
24322
24323 default:;
24324 }
24325
24326 if (TARGET_REALLY_IWMMXT)
24327 switch (mode)
24328 {
24329 case SImode:
24330 return V2SImode;
24331 case HImode:
24332 return V4HImode;
24333 case QImode:
24334 return V8QImode;
24335
24336 default:;
24337 }
24338
24339 return word_mode;
24340 }
24341
24342 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24343
24344 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24345 using r0-r4 for function arguments, r7 for the stack frame and don't have
24346 enough left over to do doubleword arithmetic. For Thumb-2 all the
24347 potentially problematic instructions accept high registers so this is not
24348 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24349 that require many low registers. */
24350 static bool
24351 arm_class_likely_spilled_p (reg_class_t rclass)
24352 {
24353 if ((TARGET_THUMB1 && rclass == LO_REGS)
24354 || rclass == CC_REG)
24355 return true;
24356
24357 return false;
24358 }
24359
24360 /* Implements target hook small_register_classes_for_mode_p. */
24361 bool
24362 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24363 {
24364 return TARGET_THUMB1;
24365 }
24366
24367 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24368 ARM insns and therefore guarantee that the shift count is modulo 256.
24369 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24370 guarantee no particular behavior for out-of-range counts. */
24371
24372 static unsigned HOST_WIDE_INT
24373 arm_shift_truncation_mask (enum machine_mode mode)
24374 {
24375 return mode == SImode ? 255 : 0;
24376 }
24377
24378
24379 /* Map internal gcc register numbers to DWARF2 register numbers. */
24380
24381 unsigned int
24382 arm_dbx_register_number (unsigned int regno)
24383 {
24384 if (regno < 16)
24385 return regno;
24386
24387 if (IS_VFP_REGNUM (regno))
24388 {
24389 /* See comment in arm_dwarf_register_span. */
24390 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24391 return 64 + regno - FIRST_VFP_REGNUM;
24392 else
24393 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24394 }
24395
24396 if (IS_IWMMXT_GR_REGNUM (regno))
24397 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24398
24399 if (IS_IWMMXT_REGNUM (regno))
24400 return 112 + regno - FIRST_IWMMXT_REGNUM;
24401
24402 gcc_unreachable ();
24403 }
24404
24405 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24406 GCC models tham as 64 32-bit registers, so we need to describe this to
24407 the DWARF generation code. Other registers can use the default. */
24408 static rtx
24409 arm_dwarf_register_span (rtx rtl)
24410 {
24411 unsigned regno;
24412 int nregs;
24413 int i;
24414 rtx p;
24415
24416 regno = REGNO (rtl);
24417 if (!IS_VFP_REGNUM (regno))
24418 return NULL_RTX;
24419
24420 /* XXX FIXME: The EABI defines two VFP register ranges:
24421 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24422 256-287: D0-D31
24423 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24424 corresponding D register. Until GDB supports this, we shall use the
24425 legacy encodings. We also use these encodings for D0-D15 for
24426 compatibility with older debuggers. */
24427 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24428 return NULL_RTX;
24429
24430 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24431 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24432 regno = (regno - FIRST_VFP_REGNUM) / 2;
24433 for (i = 0; i < nregs; i++)
24434 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24435
24436 return p;
24437 }
24438
24439 #if ARM_UNWIND_INFO
24440 /* Emit unwind directives for a store-multiple instruction or stack pointer
24441 push during alignment.
24442 These should only ever be generated by the function prologue code, so
24443 expect them to have a particular form. */
24444
24445 static void
24446 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24447 {
24448 int i;
24449 HOST_WIDE_INT offset;
24450 HOST_WIDE_INT nregs;
24451 int reg_size;
24452 unsigned reg;
24453 unsigned lastreg;
24454 rtx e;
24455
24456 e = XVECEXP (p, 0, 0);
24457 if (GET_CODE (e) != SET)
24458 abort ();
24459
24460 /* First insn will adjust the stack pointer. */
24461 if (GET_CODE (e) != SET
24462 || !REG_P (XEXP (e, 0))
24463 || REGNO (XEXP (e, 0)) != SP_REGNUM
24464 || GET_CODE (XEXP (e, 1)) != PLUS)
24465 abort ();
24466
24467 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24468 nregs = XVECLEN (p, 0) - 1;
24469
24470 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24471 if (reg < 16)
24472 {
24473 /* The function prologue may also push pc, but not annotate it as it is
24474 never restored. We turn this into a stack pointer adjustment. */
24475 if (nregs * 4 == offset - 4)
24476 {
24477 fprintf (asm_out_file, "\t.pad #4\n");
24478 offset -= 4;
24479 }
24480 reg_size = 4;
24481 fprintf (asm_out_file, "\t.save {");
24482 }
24483 else if (IS_VFP_REGNUM (reg))
24484 {
24485 reg_size = 8;
24486 fprintf (asm_out_file, "\t.vsave {");
24487 }
24488 else
24489 /* Unknown register type. */
24490 abort ();
24491
24492 /* If the stack increment doesn't match the size of the saved registers,
24493 something has gone horribly wrong. */
24494 if (offset != nregs * reg_size)
24495 abort ();
24496
24497 offset = 0;
24498 lastreg = 0;
24499 /* The remaining insns will describe the stores. */
24500 for (i = 1; i <= nregs; i++)
24501 {
24502 /* Expect (set (mem <addr>) (reg)).
24503 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24504 e = XVECEXP (p, 0, i);
24505 if (GET_CODE (e) != SET
24506 || !MEM_P (XEXP (e, 0))
24507 || !REG_P (XEXP (e, 1)))
24508 abort ();
24509
24510 reg = REGNO (XEXP (e, 1));
24511 if (reg < lastreg)
24512 abort ();
24513
24514 if (i != 1)
24515 fprintf (asm_out_file, ", ");
24516 /* We can't use %r for vfp because we need to use the
24517 double precision register names. */
24518 if (IS_VFP_REGNUM (reg))
24519 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24520 else
24521 asm_fprintf (asm_out_file, "%r", reg);
24522
24523 #ifdef ENABLE_CHECKING
24524 /* Check that the addresses are consecutive. */
24525 e = XEXP (XEXP (e, 0), 0);
24526 if (GET_CODE (e) == PLUS)
24527 {
24528 offset += reg_size;
24529 if (!REG_P (XEXP (e, 0))
24530 || REGNO (XEXP (e, 0)) != SP_REGNUM
24531 || !CONST_INT_P (XEXP (e, 1))
24532 || offset != INTVAL (XEXP (e, 1)))
24533 abort ();
24534 }
24535 else if (i != 1
24536 || !REG_P (e)
24537 || REGNO (e) != SP_REGNUM)
24538 abort ();
24539 #endif
24540 }
24541 fprintf (asm_out_file, "}\n");
24542 }
24543
24544 /* Emit unwind directives for a SET. */
24545
24546 static void
24547 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24548 {
24549 rtx e0;
24550 rtx e1;
24551 unsigned reg;
24552
24553 e0 = XEXP (p, 0);
24554 e1 = XEXP (p, 1);
24555 switch (GET_CODE (e0))
24556 {
24557 case MEM:
24558 /* Pushing a single register. */
24559 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24560 || !REG_P (XEXP (XEXP (e0, 0), 0))
24561 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24562 abort ();
24563
24564 asm_fprintf (asm_out_file, "\t.save ");
24565 if (IS_VFP_REGNUM (REGNO (e1)))
24566 asm_fprintf(asm_out_file, "{d%d}\n",
24567 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24568 else
24569 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24570 break;
24571
24572 case REG:
24573 if (REGNO (e0) == SP_REGNUM)
24574 {
24575 /* A stack increment. */
24576 if (GET_CODE (e1) != PLUS
24577 || !REG_P (XEXP (e1, 0))
24578 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24579 || !CONST_INT_P (XEXP (e1, 1)))
24580 abort ();
24581
24582 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24583 -INTVAL (XEXP (e1, 1)));
24584 }
24585 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24586 {
24587 HOST_WIDE_INT offset;
24588
24589 if (GET_CODE (e1) == PLUS)
24590 {
24591 if (!REG_P (XEXP (e1, 0))
24592 || !CONST_INT_P (XEXP (e1, 1)))
24593 abort ();
24594 reg = REGNO (XEXP (e1, 0));
24595 offset = INTVAL (XEXP (e1, 1));
24596 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24597 HARD_FRAME_POINTER_REGNUM, reg,
24598 offset);
24599 }
24600 else if (REG_P (e1))
24601 {
24602 reg = REGNO (e1);
24603 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24604 HARD_FRAME_POINTER_REGNUM, reg);
24605 }
24606 else
24607 abort ();
24608 }
24609 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
24610 {
24611 /* Move from sp to reg. */
24612 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24613 }
24614 else if (GET_CODE (e1) == PLUS
24615 && REG_P (XEXP (e1, 0))
24616 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24617 && CONST_INT_P (XEXP (e1, 1)))
24618 {
24619 /* Set reg to offset from sp. */
24620 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24621 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24622 }
24623 else
24624 abort ();
24625 break;
24626
24627 default:
24628 abort ();
24629 }
24630 }
24631
24632
24633 /* Emit unwind directives for the given insn. */
24634
24635 static void
24636 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24637 {
24638 rtx note, pat;
24639 bool handled_one = false;
24640
24641 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24642 return;
24643
24644 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24645 && (TREE_NOTHROW (current_function_decl)
24646 || crtl->all_throwers_are_sibcalls))
24647 return;
24648
24649 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24650 return;
24651
24652 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24653 {
24654 pat = XEXP (note, 0);
24655 switch (REG_NOTE_KIND (note))
24656 {
24657 case REG_FRAME_RELATED_EXPR:
24658 goto found;
24659
24660 case REG_CFA_REGISTER:
24661 if (pat == NULL)
24662 {
24663 pat = PATTERN (insn);
24664 if (GET_CODE (pat) == PARALLEL)
24665 pat = XVECEXP (pat, 0, 0);
24666 }
24667
24668 /* Only emitted for IS_STACKALIGN re-alignment. */
24669 {
24670 rtx dest, src;
24671 unsigned reg;
24672
24673 src = SET_SRC (pat);
24674 dest = SET_DEST (pat);
24675
24676 gcc_assert (src == stack_pointer_rtx);
24677 reg = REGNO (dest);
24678 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24679 reg + 0x90, reg);
24680 }
24681 handled_one = true;
24682 break;
24683
24684 case REG_CFA_DEF_CFA:
24685 case REG_CFA_EXPRESSION:
24686 case REG_CFA_ADJUST_CFA:
24687 case REG_CFA_OFFSET:
24688 /* ??? Only handling here what we actually emit. */
24689 gcc_unreachable ();
24690
24691 default:
24692 break;
24693 }
24694 }
24695 if (handled_one)
24696 return;
24697 pat = PATTERN (insn);
24698 found:
24699
24700 switch (GET_CODE (pat))
24701 {
24702 case SET:
24703 arm_unwind_emit_set (asm_out_file, pat);
24704 break;
24705
24706 case SEQUENCE:
24707 /* Store multiple. */
24708 arm_unwind_emit_sequence (asm_out_file, pat);
24709 break;
24710
24711 default:
24712 abort();
24713 }
24714 }
24715
24716
24717 /* Output a reference from a function exception table to the type_info
24718 object X. The EABI specifies that the symbol should be relocated by
24719 an R_ARM_TARGET2 relocation. */
24720
24721 static bool
24722 arm_output_ttype (rtx x)
24723 {
24724 fputs ("\t.word\t", asm_out_file);
24725 output_addr_const (asm_out_file, x);
24726 /* Use special relocations for symbol references. */
24727 if (!CONST_INT_P (x))
24728 fputs ("(TARGET2)", asm_out_file);
24729 fputc ('\n', asm_out_file);
24730
24731 return TRUE;
24732 }
24733
24734 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24735
24736 static void
24737 arm_asm_emit_except_personality (rtx personality)
24738 {
24739 fputs ("\t.personality\t", asm_out_file);
24740 output_addr_const (asm_out_file, personality);
24741 fputc ('\n', asm_out_file);
24742 }
24743
24744 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24745
24746 static void
24747 arm_asm_init_sections (void)
24748 {
24749 exception_section = get_unnamed_section (0, output_section_asm_op,
24750 "\t.handlerdata");
24751 }
24752 #endif /* ARM_UNWIND_INFO */
24753
24754 /* Output unwind directives for the start/end of a function. */
24755
24756 void
24757 arm_output_fn_unwind (FILE * f, bool prologue)
24758 {
24759 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24760 return;
24761
24762 if (prologue)
24763 fputs ("\t.fnstart\n", f);
24764 else
24765 {
24766 /* If this function will never be unwound, then mark it as such.
24767 The came condition is used in arm_unwind_emit to suppress
24768 the frame annotations. */
24769 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24770 && (TREE_NOTHROW (current_function_decl)
24771 || crtl->all_throwers_are_sibcalls))
24772 fputs("\t.cantunwind\n", f);
24773
24774 fputs ("\t.fnend\n", f);
24775 }
24776 }
24777
24778 static bool
24779 arm_emit_tls_decoration (FILE *fp, rtx x)
24780 {
24781 enum tls_reloc reloc;
24782 rtx val;
24783
24784 val = XVECEXP (x, 0, 0);
24785 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24786
24787 output_addr_const (fp, val);
24788
24789 switch (reloc)
24790 {
24791 case TLS_GD32:
24792 fputs ("(tlsgd)", fp);
24793 break;
24794 case TLS_LDM32:
24795 fputs ("(tlsldm)", fp);
24796 break;
24797 case TLS_LDO32:
24798 fputs ("(tlsldo)", fp);
24799 break;
24800 case TLS_IE32:
24801 fputs ("(gottpoff)", fp);
24802 break;
24803 case TLS_LE32:
24804 fputs ("(tpoff)", fp);
24805 break;
24806 case TLS_DESCSEQ:
24807 fputs ("(tlsdesc)", fp);
24808 break;
24809 default:
24810 gcc_unreachable ();
24811 }
24812
24813 switch (reloc)
24814 {
24815 case TLS_GD32:
24816 case TLS_LDM32:
24817 case TLS_IE32:
24818 case TLS_DESCSEQ:
24819 fputs (" + (. - ", fp);
24820 output_addr_const (fp, XVECEXP (x, 0, 2));
24821 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24822 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24823 output_addr_const (fp, XVECEXP (x, 0, 3));
24824 fputc (')', fp);
24825 break;
24826 default:
24827 break;
24828 }
24829
24830 return TRUE;
24831 }
24832
24833 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24834
24835 static void
24836 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24837 {
24838 gcc_assert (size == 4);
24839 fputs ("\t.word\t", file);
24840 output_addr_const (file, x);
24841 fputs ("(tlsldo)", file);
24842 }
24843
24844 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24845
24846 static bool
24847 arm_output_addr_const_extra (FILE *fp, rtx x)
24848 {
24849 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24850 return arm_emit_tls_decoration (fp, x);
24851 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24852 {
24853 char label[256];
24854 int labelno = INTVAL (XVECEXP (x, 0, 0));
24855
24856 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24857 assemble_name_raw (fp, label);
24858
24859 return TRUE;
24860 }
24861 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24862 {
24863 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24864 if (GOT_PCREL)
24865 fputs ("+.", fp);
24866 fputs ("-(", fp);
24867 output_addr_const (fp, XVECEXP (x, 0, 0));
24868 fputc (')', fp);
24869 return TRUE;
24870 }
24871 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24872 {
24873 output_addr_const (fp, XVECEXP (x, 0, 0));
24874 if (GOT_PCREL)
24875 fputs ("+.", fp);
24876 fputs ("-(", fp);
24877 output_addr_const (fp, XVECEXP (x, 0, 1));
24878 fputc (')', fp);
24879 return TRUE;
24880 }
24881 else if (GET_CODE (x) == CONST_VECTOR)
24882 return arm_emit_vector_const (fp, x);
24883
24884 return FALSE;
24885 }
24886
24887 /* Output assembly for a shift instruction.
24888 SET_FLAGS determines how the instruction modifies the condition codes.
24889 0 - Do not set condition codes.
24890 1 - Set condition codes.
24891 2 - Use smallest instruction. */
24892 const char *
24893 arm_output_shift(rtx * operands, int set_flags)
24894 {
24895 char pattern[100];
24896 static const char flag_chars[3] = {'?', '.', '!'};
24897 const char *shift;
24898 HOST_WIDE_INT val;
24899 char c;
24900
24901 c = flag_chars[set_flags];
24902 if (TARGET_UNIFIED_ASM)
24903 {
24904 shift = shift_op(operands[3], &val);
24905 if (shift)
24906 {
24907 if (val != -1)
24908 operands[2] = GEN_INT(val);
24909 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24910 }
24911 else
24912 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24913 }
24914 else
24915 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24916 output_asm_insn (pattern, operands);
24917 return "";
24918 }
24919
24920 /* Output assembly for a WMMX immediate shift instruction. */
24921 const char *
24922 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24923 {
24924 int shift = INTVAL (operands[2]);
24925 char templ[50];
24926 enum machine_mode opmode = GET_MODE (operands[0]);
24927
24928 gcc_assert (shift >= 0);
24929
24930 /* If the shift value in the register versions is > 63 (for D qualifier),
24931 31 (for W qualifier) or 15 (for H qualifier). */
24932 if (((opmode == V4HImode) && (shift > 15))
24933 || ((opmode == V2SImode) && (shift > 31))
24934 || ((opmode == DImode) && (shift > 63)))
24935 {
24936 if (wror_or_wsra)
24937 {
24938 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24939 output_asm_insn (templ, operands);
24940 if (opmode == DImode)
24941 {
24942 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24943 output_asm_insn (templ, operands);
24944 }
24945 }
24946 else
24947 {
24948 /* The destination register will contain all zeros. */
24949 sprintf (templ, "wzero\t%%0");
24950 output_asm_insn (templ, operands);
24951 }
24952 return "";
24953 }
24954
24955 if ((opmode == DImode) && (shift > 32))
24956 {
24957 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24958 output_asm_insn (templ, operands);
24959 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24960 output_asm_insn (templ, operands);
24961 }
24962 else
24963 {
24964 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24965 output_asm_insn (templ, operands);
24966 }
24967 return "";
24968 }
24969
24970 /* Output assembly for a WMMX tinsr instruction. */
24971 const char *
24972 arm_output_iwmmxt_tinsr (rtx *operands)
24973 {
24974 int mask = INTVAL (operands[3]);
24975 int i;
24976 char templ[50];
24977 int units = mode_nunits[GET_MODE (operands[0])];
24978 gcc_assert ((mask & (mask - 1)) == 0);
24979 for (i = 0; i < units; ++i)
24980 {
24981 if ((mask & 0x01) == 1)
24982 {
24983 break;
24984 }
24985 mask >>= 1;
24986 }
24987 gcc_assert (i < units);
24988 {
24989 switch (GET_MODE (operands[0]))
24990 {
24991 case V8QImode:
24992 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24993 break;
24994 case V4HImode:
24995 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24996 break;
24997 case V2SImode:
24998 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24999 break;
25000 default:
25001 gcc_unreachable ();
25002 break;
25003 }
25004 output_asm_insn (templ, operands);
25005 }
25006 return "";
25007 }
25008
25009 /* Output a Thumb-1 casesi dispatch sequence. */
25010 const char *
25011 thumb1_output_casesi (rtx *operands)
25012 {
25013 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
25014
25015 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25016
25017 switch (GET_MODE(diff_vec))
25018 {
25019 case QImode:
25020 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25021 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25022 case HImode:
25023 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25024 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25025 case SImode:
25026 return "bl\t%___gnu_thumb1_case_si";
25027 default:
25028 gcc_unreachable ();
25029 }
25030 }
25031
25032 /* Output a Thumb-2 casesi instruction. */
25033 const char *
25034 thumb2_output_casesi (rtx *operands)
25035 {
25036 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
25037
25038 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25039
25040 output_asm_insn ("cmp\t%0, %1", operands);
25041 output_asm_insn ("bhi\t%l3", operands);
25042 switch (GET_MODE(diff_vec))
25043 {
25044 case QImode:
25045 return "tbb\t[%|pc, %0]";
25046 case HImode:
25047 return "tbh\t[%|pc, %0, lsl #1]";
25048 case SImode:
25049 if (flag_pic)
25050 {
25051 output_asm_insn ("adr\t%4, %l2", operands);
25052 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
25053 output_asm_insn ("add\t%4, %4, %5", operands);
25054 return "bx\t%4";
25055 }
25056 else
25057 {
25058 output_asm_insn ("adr\t%4, %l2", operands);
25059 return "ldr\t%|pc, [%4, %0, lsl #2]";
25060 }
25061 default:
25062 gcc_unreachable ();
25063 }
25064 }
25065
25066 /* Most ARM cores are single issue, but some newer ones can dual issue.
25067 The scheduler descriptions rely on this being correct. */
25068 static int
25069 arm_issue_rate (void)
25070 {
25071 switch (arm_tune)
25072 {
25073 case cortexa15:
25074 return 3;
25075
25076 case cortexr4:
25077 case cortexr4f:
25078 case cortexr5:
25079 case genericv7a:
25080 case cortexa5:
25081 case cortexa8:
25082 case cortexa9:
25083 case fa726te:
25084 return 2;
25085
25086 default:
25087 return 1;
25088 }
25089 }
25090
25091 /* A table and a function to perform ARM-specific name mangling for
25092 NEON vector types in order to conform to the AAPCS (see "Procedure
25093 Call Standard for the ARM Architecture", Appendix A). To qualify
25094 for emission with the mangled names defined in that document, a
25095 vector type must not only be of the correct mode but also be
25096 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25097 typedef struct
25098 {
25099 enum machine_mode mode;
25100 const char *element_type_name;
25101 const char *aapcs_name;
25102 } arm_mangle_map_entry;
25103
25104 static arm_mangle_map_entry arm_mangle_map[] = {
25105 /* 64-bit containerized types. */
25106 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25107 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25108 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25109 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25110 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25111 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25112 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25113 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25114 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25115 /* 128-bit containerized types. */
25116 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25117 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25118 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25119 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25120 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25121 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25122 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25123 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25124 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25125 { VOIDmode, NULL, NULL }
25126 };
25127
25128 const char *
25129 arm_mangle_type (const_tree type)
25130 {
25131 arm_mangle_map_entry *pos = arm_mangle_map;
25132
25133 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25134 has to be managled as if it is in the "std" namespace. */
25135 if (TARGET_AAPCS_BASED
25136 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25137 return "St9__va_list";
25138
25139 /* Half-precision float. */
25140 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25141 return "Dh";
25142
25143 if (TREE_CODE (type) != VECTOR_TYPE)
25144 return NULL;
25145
25146 /* Check the mode of the vector type, and the name of the vector
25147 element type, against the table. */
25148 while (pos->mode != VOIDmode)
25149 {
25150 tree elt_type = TREE_TYPE (type);
25151
25152 if (pos->mode == TYPE_MODE (type)
25153 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25154 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25155 pos->element_type_name))
25156 return pos->aapcs_name;
25157
25158 pos++;
25159 }
25160
25161 /* Use the default mangling for unrecognized (possibly user-defined)
25162 vector types. */
25163 return NULL;
25164 }
25165
25166 /* Order of allocation of core registers for Thumb: this allocation is
25167 written over the corresponding initial entries of the array
25168 initialized with REG_ALLOC_ORDER. We allocate all low registers
25169 first. Saving and restoring a low register is usually cheaper than
25170 using a call-clobbered high register. */
25171
25172 static const int thumb_core_reg_alloc_order[] =
25173 {
25174 3, 2, 1, 0, 4, 5, 6, 7,
25175 14, 12, 8, 9, 10, 11
25176 };
25177
25178 /* Adjust register allocation order when compiling for Thumb. */
25179
25180 void
25181 arm_order_regs_for_local_alloc (void)
25182 {
25183 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25184 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25185 if (TARGET_THUMB)
25186 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25187 sizeof (thumb_core_reg_alloc_order));
25188 }
25189
25190 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25191
25192 bool
25193 arm_frame_pointer_required (void)
25194 {
25195 return (cfun->has_nonlocal_label
25196 || SUBTARGET_FRAME_POINTER_REQUIRED
25197 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25198 }
25199
25200 /* Only thumb1 can't support conditional execution, so return true if
25201 the target is not thumb1. */
25202 static bool
25203 arm_have_conditional_execution (void)
25204 {
25205 return !TARGET_THUMB1;
25206 }
25207
25208 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25209 static HOST_WIDE_INT
25210 arm_vector_alignment (const_tree type)
25211 {
25212 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
25213
25214 if (TARGET_AAPCS_BASED)
25215 align = MIN (align, 64);
25216
25217 return align;
25218 }
25219
25220 static unsigned int
25221 arm_autovectorize_vector_sizes (void)
25222 {
25223 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25224 }
25225
25226 static bool
25227 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25228 {
25229 /* Vectors which aren't in packed structures will not be less aligned than
25230 the natural alignment of their element type, so this is safe. */
25231 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25232 return !is_packed;
25233
25234 return default_builtin_vector_alignment_reachable (type, is_packed);
25235 }
25236
25237 static bool
25238 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25239 const_tree type, int misalignment,
25240 bool is_packed)
25241 {
25242 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25243 {
25244 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25245
25246 if (is_packed)
25247 return align == 1;
25248
25249 /* If the misalignment is unknown, we should be able to handle the access
25250 so long as it is not to a member of a packed data structure. */
25251 if (misalignment == -1)
25252 return true;
25253
25254 /* Return true if the misalignment is a multiple of the natural alignment
25255 of the vector's element type. This is probably always going to be
25256 true in practice, since we've already established that this isn't a
25257 packed access. */
25258 return ((misalignment % align) == 0);
25259 }
25260
25261 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25262 is_packed);
25263 }
25264
25265 static void
25266 arm_conditional_register_usage (void)
25267 {
25268 int regno;
25269
25270 if (TARGET_THUMB1 && optimize_size)
25271 {
25272 /* When optimizing for size on Thumb-1, it's better not
25273 to use the HI regs, because of the overhead of
25274 stacking them. */
25275 for (regno = FIRST_HI_REGNUM;
25276 regno <= LAST_HI_REGNUM; ++regno)
25277 fixed_regs[regno] = call_used_regs[regno] = 1;
25278 }
25279
25280 /* The link register can be clobbered by any branch insn,
25281 but we have no way to track that at present, so mark
25282 it as unavailable. */
25283 if (TARGET_THUMB1)
25284 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25285
25286 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25287 {
25288 /* VFPv3 registers are disabled when earlier VFP
25289 versions are selected due to the definition of
25290 LAST_VFP_REGNUM. */
25291 for (regno = FIRST_VFP_REGNUM;
25292 regno <= LAST_VFP_REGNUM; ++ regno)
25293 {
25294 fixed_regs[regno] = 0;
25295 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25296 || regno >= FIRST_VFP_REGNUM + 32;
25297 }
25298 }
25299
25300 if (TARGET_REALLY_IWMMXT)
25301 {
25302 regno = FIRST_IWMMXT_GR_REGNUM;
25303 /* The 2002/10/09 revision of the XScale ABI has wCG0
25304 and wCG1 as call-preserved registers. The 2002/11/21
25305 revision changed this so that all wCG registers are
25306 scratch registers. */
25307 for (regno = FIRST_IWMMXT_GR_REGNUM;
25308 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25309 fixed_regs[regno] = 0;
25310 /* The XScale ABI has wR0 - wR9 as scratch registers,
25311 the rest as call-preserved registers. */
25312 for (regno = FIRST_IWMMXT_REGNUM;
25313 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25314 {
25315 fixed_regs[regno] = 0;
25316 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25317 }
25318 }
25319
25320 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25321 {
25322 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25323 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25324 }
25325 else if (TARGET_APCS_STACK)
25326 {
25327 fixed_regs[10] = 1;
25328 call_used_regs[10] = 1;
25329 }
25330 /* -mcaller-super-interworking reserves r11 for calls to
25331 _interwork_r11_call_via_rN(). Making the register global
25332 is an easy way of ensuring that it remains valid for all
25333 calls. */
25334 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25335 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25336 {
25337 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25338 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25339 if (TARGET_CALLER_INTERWORKING)
25340 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25341 }
25342 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25343 }
25344
25345 static reg_class_t
25346 arm_preferred_rename_class (reg_class_t rclass)
25347 {
25348 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25349 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25350 and code size can be reduced. */
25351 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25352 return LO_REGS;
25353 else
25354 return NO_REGS;
25355 }
25356
25357 /* Compute the atrribute "length" of insn "*push_multi".
25358 So this function MUST be kept in sync with that insn pattern. */
25359 int
25360 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25361 {
25362 int i, regno, hi_reg;
25363 int num_saves = XVECLEN (parallel_op, 0);
25364
25365 /* ARM mode. */
25366 if (TARGET_ARM)
25367 return 4;
25368 /* Thumb1 mode. */
25369 if (TARGET_THUMB1)
25370 return 2;
25371
25372 /* Thumb2 mode. */
25373 regno = REGNO (first_op);
25374 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25375 for (i = 1; i < num_saves && !hi_reg; i++)
25376 {
25377 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25378 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25379 }
25380
25381 if (!hi_reg)
25382 return 2;
25383 return 4;
25384 }
25385
25386 /* Compute the number of instructions emitted by output_move_double. */
25387 int
25388 arm_count_output_move_double_insns (rtx *operands)
25389 {
25390 int count;
25391 rtx ops[2];
25392 /* output_move_double may modify the operands array, so call it
25393 here on a copy of the array. */
25394 ops[0] = operands[0];
25395 ops[1] = operands[1];
25396 output_move_double (ops, false, &count);
25397 return count;
25398 }
25399
25400 int
25401 vfp3_const_double_for_fract_bits (rtx operand)
25402 {
25403 REAL_VALUE_TYPE r0;
25404
25405 if (!CONST_DOUBLE_P (operand))
25406 return 0;
25407
25408 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25409 if (exact_real_inverse (DFmode, &r0))
25410 {
25411 if (exact_real_truncate (DFmode, &r0))
25412 {
25413 HOST_WIDE_INT value = real_to_integer (&r0);
25414 value = value & 0xffffffff;
25415 if ((value != 0) && ( (value & (value - 1)) == 0))
25416 return int_log2 (value);
25417 }
25418 }
25419 return 0;
25420 }
25421 \f
25422 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25423
25424 static void
25425 arm_pre_atomic_barrier (enum memmodel model)
25426 {
25427 if (need_atomic_barrier_p (model, true))
25428 emit_insn (gen_memory_barrier ());
25429 }
25430
25431 static void
25432 arm_post_atomic_barrier (enum memmodel model)
25433 {
25434 if (need_atomic_barrier_p (model, false))
25435 emit_insn (gen_memory_barrier ());
25436 }
25437
25438 /* Emit the load-exclusive and store-exclusive instructions. */
25439
25440 static void
25441 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25442 {
25443 rtx (*gen) (rtx, rtx);
25444
25445 switch (mode)
25446 {
25447 case QImode: gen = gen_arm_load_exclusiveqi; break;
25448 case HImode: gen = gen_arm_load_exclusivehi; break;
25449 case SImode: gen = gen_arm_load_exclusivesi; break;
25450 case DImode: gen = gen_arm_load_exclusivedi; break;
25451 default:
25452 gcc_unreachable ();
25453 }
25454
25455 emit_insn (gen (rval, mem));
25456 }
25457
25458 static void
25459 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25460 {
25461 rtx (*gen) (rtx, rtx, rtx);
25462
25463 switch (mode)
25464 {
25465 case QImode: gen = gen_arm_store_exclusiveqi; break;
25466 case HImode: gen = gen_arm_store_exclusivehi; break;
25467 case SImode: gen = gen_arm_store_exclusivesi; break;
25468 case DImode: gen = gen_arm_store_exclusivedi; break;
25469 default:
25470 gcc_unreachable ();
25471 }
25472
25473 emit_insn (gen (bval, rval, mem));
25474 }
25475
25476 /* Mark the previous jump instruction as unlikely. */
25477
25478 static void
25479 emit_unlikely_jump (rtx insn)
25480 {
25481 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25482
25483 insn = emit_jump_insn (insn);
25484 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25485 }
25486
25487 /* Expand a compare and swap pattern. */
25488
25489 void
25490 arm_expand_compare_and_swap (rtx operands[])
25491 {
25492 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25493 enum machine_mode mode;
25494 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25495
25496 bval = operands[0];
25497 rval = operands[1];
25498 mem = operands[2];
25499 oldval = operands[3];
25500 newval = operands[4];
25501 is_weak = operands[5];
25502 mod_s = operands[6];
25503 mod_f = operands[7];
25504 mode = GET_MODE (mem);
25505
25506 switch (mode)
25507 {
25508 case QImode:
25509 case HImode:
25510 /* For narrow modes, we're going to perform the comparison in SImode,
25511 so do the zero-extension now. */
25512 rval = gen_reg_rtx (SImode);
25513 oldval = convert_modes (SImode, mode, oldval, true);
25514 /* FALLTHRU */
25515
25516 case SImode:
25517 /* Force the value into a register if needed. We waited until after
25518 the zero-extension above to do this properly. */
25519 if (!arm_add_operand (oldval, SImode))
25520 oldval = force_reg (SImode, oldval);
25521 break;
25522
25523 case DImode:
25524 if (!cmpdi_operand (oldval, mode))
25525 oldval = force_reg (mode, oldval);
25526 break;
25527
25528 default:
25529 gcc_unreachable ();
25530 }
25531
25532 switch (mode)
25533 {
25534 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25535 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25536 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25537 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25538 default:
25539 gcc_unreachable ();
25540 }
25541
25542 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25543
25544 if (mode == QImode || mode == HImode)
25545 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25546
25547 /* In all cases, we arrange for success to be signaled by Z set.
25548 This arrangement allows for the boolean result to be used directly
25549 in a subsequent branch, post optimization. */
25550 x = gen_rtx_REG (CCmode, CC_REGNUM);
25551 x = gen_rtx_EQ (SImode, x, const0_rtx);
25552 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25553 }
25554
25555 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25556 another memory store between the load-exclusive and store-exclusive can
25557 reset the monitor from Exclusive to Open state. This means we must wait
25558 until after reload to split the pattern, lest we get a register spill in
25559 the middle of the atomic sequence. */
25560
25561 void
25562 arm_split_compare_and_swap (rtx operands[])
25563 {
25564 rtx rval, mem, oldval, newval, scratch;
25565 enum machine_mode mode;
25566 enum memmodel mod_s, mod_f;
25567 bool is_weak;
25568 rtx label1, label2, x, cond;
25569
25570 rval = operands[0];
25571 mem = operands[1];
25572 oldval = operands[2];
25573 newval = operands[3];
25574 is_weak = (operands[4] != const0_rtx);
25575 mod_s = (enum memmodel) INTVAL (operands[5]);
25576 mod_f = (enum memmodel) INTVAL (operands[6]);
25577 scratch = operands[7];
25578 mode = GET_MODE (mem);
25579
25580 arm_pre_atomic_barrier (mod_s);
25581
25582 label1 = NULL_RTX;
25583 if (!is_weak)
25584 {
25585 label1 = gen_label_rtx ();
25586 emit_label (label1);
25587 }
25588 label2 = gen_label_rtx ();
25589
25590 arm_emit_load_exclusive (mode, rval, mem);
25591
25592 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25593 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25594 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25595 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25596 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25597
25598 arm_emit_store_exclusive (mode, scratch, mem, newval);
25599
25600 /* Weak or strong, we want EQ to be true for success, so that we
25601 match the flags that we got from the compare above. */
25602 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25603 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25604 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25605
25606 if (!is_weak)
25607 {
25608 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25609 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25610 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25611 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25612 }
25613
25614 if (mod_f != MEMMODEL_RELAXED)
25615 emit_label (label2);
25616
25617 arm_post_atomic_barrier (mod_s);
25618
25619 if (mod_f == MEMMODEL_RELAXED)
25620 emit_label (label2);
25621 }
25622
25623 void
25624 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25625 rtx value, rtx model_rtx, rtx cond)
25626 {
25627 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25628 enum machine_mode mode = GET_MODE (mem);
25629 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25630 rtx label, x;
25631
25632 arm_pre_atomic_barrier (model);
25633
25634 label = gen_label_rtx ();
25635 emit_label (label);
25636
25637 if (new_out)
25638 new_out = gen_lowpart (wmode, new_out);
25639 if (old_out)
25640 old_out = gen_lowpart (wmode, old_out);
25641 else
25642 old_out = new_out;
25643 value = simplify_gen_subreg (wmode, value, mode, 0);
25644
25645 arm_emit_load_exclusive (mode, old_out, mem);
25646
25647 switch (code)
25648 {
25649 case SET:
25650 new_out = value;
25651 break;
25652
25653 case NOT:
25654 x = gen_rtx_AND (wmode, old_out, value);
25655 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25656 x = gen_rtx_NOT (wmode, new_out);
25657 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25658 break;
25659
25660 case MINUS:
25661 if (CONST_INT_P (value))
25662 {
25663 value = GEN_INT (-INTVAL (value));
25664 code = PLUS;
25665 }
25666 /* FALLTHRU */
25667
25668 case PLUS:
25669 if (mode == DImode)
25670 {
25671 /* DImode plus/minus need to clobber flags. */
25672 /* The adddi3 and subdi3 patterns are incorrectly written so that
25673 they require matching operands, even when we could easily support
25674 three operands. Thankfully, this can be fixed up post-splitting,
25675 as the individual add+adc patterns do accept three operands and
25676 post-reload cprop can make these moves go away. */
25677 emit_move_insn (new_out, old_out);
25678 if (code == PLUS)
25679 x = gen_adddi3 (new_out, new_out, value);
25680 else
25681 x = gen_subdi3 (new_out, new_out, value);
25682 emit_insn (x);
25683 break;
25684 }
25685 /* FALLTHRU */
25686
25687 default:
25688 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25689 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25690 break;
25691 }
25692
25693 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25694
25695 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25696 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25697
25698 arm_post_atomic_barrier (model);
25699 }
25700 \f
25701 #define MAX_VECT_LEN 16
25702
25703 struct expand_vec_perm_d
25704 {
25705 rtx target, op0, op1;
25706 unsigned char perm[MAX_VECT_LEN];
25707 enum machine_mode vmode;
25708 unsigned char nelt;
25709 bool one_vector_p;
25710 bool testing_p;
25711 };
25712
25713 /* Generate a variable permutation. */
25714
25715 static void
25716 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25717 {
25718 enum machine_mode vmode = GET_MODE (target);
25719 bool one_vector_p = rtx_equal_p (op0, op1);
25720
25721 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25722 gcc_checking_assert (GET_MODE (op0) == vmode);
25723 gcc_checking_assert (GET_MODE (op1) == vmode);
25724 gcc_checking_assert (GET_MODE (sel) == vmode);
25725 gcc_checking_assert (TARGET_NEON);
25726
25727 if (one_vector_p)
25728 {
25729 if (vmode == V8QImode)
25730 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25731 else
25732 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25733 }
25734 else
25735 {
25736 rtx pair;
25737
25738 if (vmode == V8QImode)
25739 {
25740 pair = gen_reg_rtx (V16QImode);
25741 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25742 pair = gen_lowpart (TImode, pair);
25743 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25744 }
25745 else
25746 {
25747 pair = gen_reg_rtx (OImode);
25748 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25749 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25750 }
25751 }
25752 }
25753
25754 void
25755 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25756 {
25757 enum machine_mode vmode = GET_MODE (target);
25758 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25759 bool one_vector_p = rtx_equal_p (op0, op1);
25760 rtx rmask[MAX_VECT_LEN], mask;
25761
25762 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25763 numbering of elements for big-endian, we must reverse the order. */
25764 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25765
25766 /* The VTBL instruction does not use a modulo index, so we must take care
25767 of that ourselves. */
25768 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25769 for (i = 0; i < nelt; ++i)
25770 rmask[i] = mask;
25771 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25772 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25773
25774 arm_expand_vec_perm_1 (target, op0, op1, sel);
25775 }
25776
25777 /* Generate or test for an insn that supports a constant permutation. */
25778
25779 /* Recognize patterns for the VUZP insns. */
25780
25781 static bool
25782 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25783 {
25784 unsigned int i, odd, mask, nelt = d->nelt;
25785 rtx out0, out1, in0, in1, x;
25786 rtx (*gen)(rtx, rtx, rtx, rtx);
25787
25788 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25789 return false;
25790
25791 /* Note that these are little-endian tests. Adjust for big-endian later. */
25792 if (d->perm[0] == 0)
25793 odd = 0;
25794 else if (d->perm[0] == 1)
25795 odd = 1;
25796 else
25797 return false;
25798 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25799
25800 for (i = 0; i < nelt; i++)
25801 {
25802 unsigned elt = (i * 2 + odd) & mask;
25803 if (d->perm[i] != elt)
25804 return false;
25805 }
25806
25807 /* Success! */
25808 if (d->testing_p)
25809 return true;
25810
25811 switch (d->vmode)
25812 {
25813 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25814 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25815 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25816 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25817 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25818 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25819 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25820 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25821 default:
25822 gcc_unreachable ();
25823 }
25824
25825 in0 = d->op0;
25826 in1 = d->op1;
25827 if (BYTES_BIG_ENDIAN)
25828 {
25829 x = in0, in0 = in1, in1 = x;
25830 odd = !odd;
25831 }
25832
25833 out0 = d->target;
25834 out1 = gen_reg_rtx (d->vmode);
25835 if (odd)
25836 x = out0, out0 = out1, out1 = x;
25837
25838 emit_insn (gen (out0, in0, in1, out1));
25839 return true;
25840 }
25841
25842 /* Recognize patterns for the VZIP insns. */
25843
25844 static bool
25845 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25846 {
25847 unsigned int i, high, mask, nelt = d->nelt;
25848 rtx out0, out1, in0, in1, x;
25849 rtx (*gen)(rtx, rtx, rtx, rtx);
25850
25851 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25852 return false;
25853
25854 /* Note that these are little-endian tests. Adjust for big-endian later. */
25855 high = nelt / 2;
25856 if (d->perm[0] == high)
25857 ;
25858 else if (d->perm[0] == 0)
25859 high = 0;
25860 else
25861 return false;
25862 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25863
25864 for (i = 0; i < nelt / 2; i++)
25865 {
25866 unsigned elt = (i + high) & mask;
25867 if (d->perm[i * 2] != elt)
25868 return false;
25869 elt = (elt + nelt) & mask;
25870 if (d->perm[i * 2 + 1] != elt)
25871 return false;
25872 }
25873
25874 /* Success! */
25875 if (d->testing_p)
25876 return true;
25877
25878 switch (d->vmode)
25879 {
25880 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25881 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25882 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25883 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25884 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25885 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25886 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25887 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25888 default:
25889 gcc_unreachable ();
25890 }
25891
25892 in0 = d->op0;
25893 in1 = d->op1;
25894 if (BYTES_BIG_ENDIAN)
25895 {
25896 x = in0, in0 = in1, in1 = x;
25897 high = !high;
25898 }
25899
25900 out0 = d->target;
25901 out1 = gen_reg_rtx (d->vmode);
25902 if (high)
25903 x = out0, out0 = out1, out1 = x;
25904
25905 emit_insn (gen (out0, in0, in1, out1));
25906 return true;
25907 }
25908
25909 /* Recognize patterns for the VREV insns. */
25910
25911 static bool
25912 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25913 {
25914 unsigned int i, j, diff, nelt = d->nelt;
25915 rtx (*gen)(rtx, rtx, rtx);
25916
25917 if (!d->one_vector_p)
25918 return false;
25919
25920 diff = d->perm[0];
25921 switch (diff)
25922 {
25923 case 7:
25924 switch (d->vmode)
25925 {
25926 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25927 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25928 default:
25929 return false;
25930 }
25931 break;
25932 case 3:
25933 switch (d->vmode)
25934 {
25935 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25936 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25937 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25938 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25939 default:
25940 return false;
25941 }
25942 break;
25943 case 1:
25944 switch (d->vmode)
25945 {
25946 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25947 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25948 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25949 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25950 case V4SImode: gen = gen_neon_vrev64v4si; break;
25951 case V2SImode: gen = gen_neon_vrev64v2si; break;
25952 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25953 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25954 default:
25955 return false;
25956 }
25957 break;
25958 default:
25959 return false;
25960 }
25961
25962 for (i = 0; i < nelt ; i += diff + 1)
25963 for (j = 0; j <= diff; j += 1)
25964 {
25965 /* This is guaranteed to be true as the value of diff
25966 is 7, 3, 1 and we should have enough elements in the
25967 queue to generate this. Getting a vector mask with a
25968 value of diff other than these values implies that
25969 something is wrong by the time we get here. */
25970 gcc_assert (i + j < nelt);
25971 if (d->perm[i + j] != i + diff - j)
25972 return false;
25973 }
25974
25975 /* Success! */
25976 if (d->testing_p)
25977 return true;
25978
25979 /* ??? The third operand is an artifact of the builtin infrastructure
25980 and is ignored by the actual instruction. */
25981 emit_insn (gen (d->target, d->op0, const0_rtx));
25982 return true;
25983 }
25984
25985 /* Recognize patterns for the VTRN insns. */
25986
25987 static bool
25988 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25989 {
25990 unsigned int i, odd, mask, nelt = d->nelt;
25991 rtx out0, out1, in0, in1, x;
25992 rtx (*gen)(rtx, rtx, rtx, rtx);
25993
25994 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25995 return false;
25996
25997 /* Note that these are little-endian tests. Adjust for big-endian later. */
25998 if (d->perm[0] == 0)
25999 odd = 0;
26000 else if (d->perm[0] == 1)
26001 odd = 1;
26002 else
26003 return false;
26004 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26005
26006 for (i = 0; i < nelt; i += 2)
26007 {
26008 if (d->perm[i] != i + odd)
26009 return false;
26010 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
26011 return false;
26012 }
26013
26014 /* Success! */
26015 if (d->testing_p)
26016 return true;
26017
26018 switch (d->vmode)
26019 {
26020 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
26021 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
26022 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
26023 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
26024 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
26025 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
26026 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
26027 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
26028 default:
26029 gcc_unreachable ();
26030 }
26031
26032 in0 = d->op0;
26033 in1 = d->op1;
26034 if (BYTES_BIG_ENDIAN)
26035 {
26036 x = in0, in0 = in1, in1 = x;
26037 odd = !odd;
26038 }
26039
26040 out0 = d->target;
26041 out1 = gen_reg_rtx (d->vmode);
26042 if (odd)
26043 x = out0, out0 = out1, out1 = x;
26044
26045 emit_insn (gen (out0, in0, in1, out1));
26046 return true;
26047 }
26048
26049 /* Recognize patterns for the VEXT insns. */
26050
26051 static bool
26052 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
26053 {
26054 unsigned int i, nelt = d->nelt;
26055 rtx (*gen) (rtx, rtx, rtx, rtx);
26056 rtx offset;
26057
26058 unsigned int location;
26059
26060 unsigned int next = d->perm[0] + 1;
26061
26062 /* TODO: Handle GCC's numbering of elements for big-endian. */
26063 if (BYTES_BIG_ENDIAN)
26064 return false;
26065
26066 /* Check if the extracted indexes are increasing by one. */
26067 for (i = 1; i < nelt; next++, i++)
26068 {
26069 /* If we hit the most significant element of the 2nd vector in
26070 the previous iteration, no need to test further. */
26071 if (next == 2 * nelt)
26072 return false;
26073
26074 /* If we are operating on only one vector: it could be a
26075 rotation. If there are only two elements of size < 64, let
26076 arm_evpc_neon_vrev catch it. */
26077 if (d->one_vector_p && (next == nelt))
26078 {
26079 if ((nelt == 2) && (d->vmode != V2DImode))
26080 return false;
26081 else
26082 next = 0;
26083 }
26084
26085 if (d->perm[i] != next)
26086 return false;
26087 }
26088
26089 location = d->perm[0];
26090
26091 switch (d->vmode)
26092 {
26093 case V16QImode: gen = gen_neon_vextv16qi; break;
26094 case V8QImode: gen = gen_neon_vextv8qi; break;
26095 case V4HImode: gen = gen_neon_vextv4hi; break;
26096 case V8HImode: gen = gen_neon_vextv8hi; break;
26097 case V2SImode: gen = gen_neon_vextv2si; break;
26098 case V4SImode: gen = gen_neon_vextv4si; break;
26099 case V2SFmode: gen = gen_neon_vextv2sf; break;
26100 case V4SFmode: gen = gen_neon_vextv4sf; break;
26101 case V2DImode: gen = gen_neon_vextv2di; break;
26102 default:
26103 return false;
26104 }
26105
26106 /* Success! */
26107 if (d->testing_p)
26108 return true;
26109
26110 offset = GEN_INT (location);
26111 emit_insn (gen (d->target, d->op0, d->op1, offset));
26112 return true;
26113 }
26114
26115 /* The NEON VTBL instruction is a fully variable permuation that's even
26116 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26117 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26118 can do slightly better by expanding this as a constant where we don't
26119 have to apply a mask. */
26120
26121 static bool
26122 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26123 {
26124 rtx rperm[MAX_VECT_LEN], sel;
26125 enum machine_mode vmode = d->vmode;
26126 unsigned int i, nelt = d->nelt;
26127
26128 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26129 numbering of elements for big-endian, we must reverse the order. */
26130 if (BYTES_BIG_ENDIAN)
26131 return false;
26132
26133 if (d->testing_p)
26134 return true;
26135
26136 /* Generic code will try constant permutation twice. Once with the
26137 original mode and again with the elements lowered to QImode.
26138 So wait and don't do the selector expansion ourselves. */
26139 if (vmode != V8QImode && vmode != V16QImode)
26140 return false;
26141
26142 for (i = 0; i < nelt; ++i)
26143 rperm[i] = GEN_INT (d->perm[i]);
26144 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26145 sel = force_reg (vmode, sel);
26146
26147 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26148 return true;
26149 }
26150
26151 static bool
26152 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26153 {
26154 /* Check if the input mask matches vext before reordering the
26155 operands. */
26156 if (TARGET_NEON)
26157 if (arm_evpc_neon_vext (d))
26158 return true;
26159
26160 /* The pattern matching functions above are written to look for a small
26161 number to begin the sequence (0, 1, N/2). If we begin with an index
26162 from the second operand, we can swap the operands. */
26163 if (d->perm[0] >= d->nelt)
26164 {
26165 unsigned i, nelt = d->nelt;
26166 rtx x;
26167
26168 for (i = 0; i < nelt; ++i)
26169 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26170
26171 x = d->op0;
26172 d->op0 = d->op1;
26173 d->op1 = x;
26174 }
26175
26176 if (TARGET_NEON)
26177 {
26178 if (arm_evpc_neon_vuzp (d))
26179 return true;
26180 if (arm_evpc_neon_vzip (d))
26181 return true;
26182 if (arm_evpc_neon_vrev (d))
26183 return true;
26184 if (arm_evpc_neon_vtrn (d))
26185 return true;
26186 return arm_evpc_neon_vtbl (d);
26187 }
26188 return false;
26189 }
26190
26191 /* Expand a vec_perm_const pattern. */
26192
26193 bool
26194 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26195 {
26196 struct expand_vec_perm_d d;
26197 int i, nelt, which;
26198
26199 d.target = target;
26200 d.op0 = op0;
26201 d.op1 = op1;
26202
26203 d.vmode = GET_MODE (target);
26204 gcc_assert (VECTOR_MODE_P (d.vmode));
26205 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26206 d.testing_p = false;
26207
26208 for (i = which = 0; i < nelt; ++i)
26209 {
26210 rtx e = XVECEXP (sel, 0, i);
26211 int ei = INTVAL (e) & (2 * nelt - 1);
26212 which |= (ei < nelt ? 1 : 2);
26213 d.perm[i] = ei;
26214 }
26215
26216 switch (which)
26217 {
26218 default:
26219 gcc_unreachable();
26220
26221 case 3:
26222 d.one_vector_p = false;
26223 if (!rtx_equal_p (op0, op1))
26224 break;
26225
26226 /* The elements of PERM do not suggest that only the first operand
26227 is used, but both operands are identical. Allow easier matching
26228 of the permutation by folding the permutation into the single
26229 input vector. */
26230 /* FALLTHRU */
26231 case 2:
26232 for (i = 0; i < nelt; ++i)
26233 d.perm[i] &= nelt - 1;
26234 d.op0 = op1;
26235 d.one_vector_p = true;
26236 break;
26237
26238 case 1:
26239 d.op1 = op0;
26240 d.one_vector_p = true;
26241 break;
26242 }
26243
26244 return arm_expand_vec_perm_const_1 (&d);
26245 }
26246
26247 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26248
26249 static bool
26250 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
26251 const unsigned char *sel)
26252 {
26253 struct expand_vec_perm_d d;
26254 unsigned int i, nelt, which;
26255 bool ret;
26256
26257 d.vmode = vmode;
26258 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26259 d.testing_p = true;
26260 memcpy (d.perm, sel, nelt);
26261
26262 /* Categorize the set of elements in the selector. */
26263 for (i = which = 0; i < nelt; ++i)
26264 {
26265 unsigned char e = d.perm[i];
26266 gcc_assert (e < 2 * nelt);
26267 which |= (e < nelt ? 1 : 2);
26268 }
26269
26270 /* For all elements from second vector, fold the elements to first. */
26271 if (which == 2)
26272 for (i = 0; i < nelt; ++i)
26273 d.perm[i] -= nelt;
26274
26275 /* Check whether the mask can be applied to the vector type. */
26276 d.one_vector_p = (which != 3);
26277
26278 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
26279 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
26280 if (!d.one_vector_p)
26281 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
26282
26283 start_sequence ();
26284 ret = arm_expand_vec_perm_const_1 (&d);
26285 end_sequence ();
26286
26287 return ret;
26288 }
26289
26290 bool
26291 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
26292 {
26293 /* If we are soft float and we do not have ldrd
26294 then all auto increment forms are ok. */
26295 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26296 return true;
26297
26298 switch (code)
26299 {
26300 /* Post increment and Pre Decrement are supported for all
26301 instruction forms except for vector forms. */
26302 case ARM_POST_INC:
26303 case ARM_PRE_DEC:
26304 if (VECTOR_MODE_P (mode))
26305 {
26306 if (code != ARM_PRE_DEC)
26307 return true;
26308 else
26309 return false;
26310 }
26311
26312 return true;
26313
26314 case ARM_POST_DEC:
26315 case ARM_PRE_INC:
26316 /* Without LDRD and mode size greater than
26317 word size, there is no point in auto-incrementing
26318 because ldm and stm will not have these forms. */
26319 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26320 return false;
26321
26322 /* Vector and floating point modes do not support
26323 these auto increment forms. */
26324 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26325 return false;
26326
26327 return true;
26328
26329 default:
26330 return false;
26331
26332 }
26333
26334 return false;
26335 }
26336
26337 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26338 on ARM, since we know that shifts by negative amounts are no-ops.
26339 Additionally, the default expansion code is not available or suitable
26340 for post-reload insn splits (this can occur when the register allocator
26341 chooses not to do a shift in NEON).
26342
26343 This function is used in both initial expand and post-reload splits, and
26344 handles all kinds of 64-bit shifts.
26345
26346 Input requirements:
26347 - It is safe for the input and output to be the same register, but
26348 early-clobber rules apply for the shift amount and scratch registers.
26349 - Shift by register requires both scratch registers. Shift by a constant
26350 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26351 the scratch registers may be NULL.
26352 - Ashiftrt by a register also clobbers the CC register. */
26353 void
26354 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26355 rtx amount, rtx scratch1, rtx scratch2)
26356 {
26357 rtx out_high = gen_highpart (SImode, out);
26358 rtx out_low = gen_lowpart (SImode, out);
26359 rtx in_high = gen_highpart (SImode, in);
26360 rtx in_low = gen_lowpart (SImode, in);
26361
26362 /* Terminology:
26363 in = the register pair containing the input value.
26364 out = the destination register pair.
26365 up = the high- or low-part of each pair.
26366 down = the opposite part to "up".
26367 In a shift, we can consider bits to shift from "up"-stream to
26368 "down"-stream, so in a left-shift "up" is the low-part and "down"
26369 is the high-part of each register pair. */
26370
26371 rtx out_up = code == ASHIFT ? out_low : out_high;
26372 rtx out_down = code == ASHIFT ? out_high : out_low;
26373 rtx in_up = code == ASHIFT ? in_low : in_high;
26374 rtx in_down = code == ASHIFT ? in_high : in_low;
26375
26376 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26377 gcc_assert (out
26378 && (REG_P (out) || GET_CODE (out) == SUBREG)
26379 && GET_MODE (out) == DImode);
26380 gcc_assert (in
26381 && (REG_P (in) || GET_CODE (in) == SUBREG)
26382 && GET_MODE (in) == DImode);
26383 gcc_assert (amount
26384 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26385 && GET_MODE (amount) == SImode)
26386 || CONST_INT_P (amount)));
26387 gcc_assert (scratch1 == NULL
26388 || (GET_CODE (scratch1) == SCRATCH)
26389 || (GET_MODE (scratch1) == SImode
26390 && REG_P (scratch1)));
26391 gcc_assert (scratch2 == NULL
26392 || (GET_CODE (scratch2) == SCRATCH)
26393 || (GET_MODE (scratch2) == SImode
26394 && REG_P (scratch2)));
26395 gcc_assert (!REG_P (out) || !REG_P (amount)
26396 || !HARD_REGISTER_P (out)
26397 || (REGNO (out) != REGNO (amount)
26398 && REGNO (out) + 1 != REGNO (amount)));
26399
26400 /* Macros to make following code more readable. */
26401 #define SUB_32(DEST,SRC) \
26402 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26403 #define RSB_32(DEST,SRC) \
26404 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26405 #define SUB_S_32(DEST,SRC) \
26406 gen_addsi3_compare0 ((DEST), (SRC), \
26407 GEN_INT (-32))
26408 #define SET(DEST,SRC) \
26409 gen_rtx_SET (SImode, (DEST), (SRC))
26410 #define SHIFT(CODE,SRC,AMOUNT) \
26411 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26412 #define LSHIFT(CODE,SRC,AMOUNT) \
26413 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26414 SImode, (SRC), (AMOUNT))
26415 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26416 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26417 SImode, (SRC), (AMOUNT))
26418 #define ORR(A,B) \
26419 gen_rtx_IOR (SImode, (A), (B))
26420 #define BRANCH(COND,LABEL) \
26421 gen_arm_cond_branch ((LABEL), \
26422 gen_rtx_ ## COND (CCmode, cc_reg, \
26423 const0_rtx), \
26424 cc_reg)
26425
26426 /* Shifts by register and shifts by constant are handled separately. */
26427 if (CONST_INT_P (amount))
26428 {
26429 /* We have a shift-by-constant. */
26430
26431 /* First, handle out-of-range shift amounts.
26432 In both cases we try to match the result an ARM instruction in a
26433 shift-by-register would give. This helps reduce execution
26434 differences between optimization levels, but it won't stop other
26435 parts of the compiler doing different things. This is "undefined
26436 behaviour, in any case. */
26437 if (INTVAL (amount) <= 0)
26438 emit_insn (gen_movdi (out, in));
26439 else if (INTVAL (amount) >= 64)
26440 {
26441 if (code == ASHIFTRT)
26442 {
26443 rtx const31_rtx = GEN_INT (31);
26444 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26445 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26446 }
26447 else
26448 emit_insn (gen_movdi (out, const0_rtx));
26449 }
26450
26451 /* Now handle valid shifts. */
26452 else if (INTVAL (amount) < 32)
26453 {
26454 /* Shifts by a constant less than 32. */
26455 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
26456
26457 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26458 emit_insn (SET (out_down,
26459 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26460 out_down)));
26461 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26462 }
26463 else
26464 {
26465 /* Shifts by a constant greater than 31. */
26466 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
26467
26468 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26469 if (code == ASHIFTRT)
26470 emit_insn (gen_ashrsi3 (out_up, in_up,
26471 GEN_INT (31)));
26472 else
26473 emit_insn (SET (out_up, const0_rtx));
26474 }
26475 }
26476 else
26477 {
26478 /* We have a shift-by-register. */
26479 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26480
26481 /* This alternative requires the scratch registers. */
26482 gcc_assert (scratch1 && REG_P (scratch1));
26483 gcc_assert (scratch2 && REG_P (scratch2));
26484
26485 /* We will need the values "amount-32" and "32-amount" later.
26486 Swapping them around now allows the later code to be more general. */
26487 switch (code)
26488 {
26489 case ASHIFT:
26490 emit_insn (SUB_32 (scratch1, amount));
26491 emit_insn (RSB_32 (scratch2, amount));
26492 break;
26493 case ASHIFTRT:
26494 emit_insn (RSB_32 (scratch1, amount));
26495 /* Also set CC = amount > 32. */
26496 emit_insn (SUB_S_32 (scratch2, amount));
26497 break;
26498 case LSHIFTRT:
26499 emit_insn (RSB_32 (scratch1, amount));
26500 emit_insn (SUB_32 (scratch2, amount));
26501 break;
26502 default:
26503 gcc_unreachable ();
26504 }
26505
26506 /* Emit code like this:
26507
26508 arithmetic-left:
26509 out_down = in_down << amount;
26510 out_down = (in_up << (amount - 32)) | out_down;
26511 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26512 out_up = in_up << amount;
26513
26514 arithmetic-right:
26515 out_down = in_down >> amount;
26516 out_down = (in_up << (32 - amount)) | out_down;
26517 if (amount < 32)
26518 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26519 out_up = in_up << amount;
26520
26521 logical-right:
26522 out_down = in_down >> amount;
26523 out_down = (in_up << (32 - amount)) | out_down;
26524 if (amount < 32)
26525 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26526 out_up = in_up << amount;
26527
26528 The ARM and Thumb2 variants are the same but implemented slightly
26529 differently. If this were only called during expand we could just
26530 use the Thumb2 case and let combine do the right thing, but this
26531 can also be called from post-reload splitters. */
26532
26533 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26534
26535 if (!TARGET_THUMB2)
26536 {
26537 /* Emit code for ARM mode. */
26538 emit_insn (SET (out_down,
26539 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26540 if (code == ASHIFTRT)
26541 {
26542 rtx done_label = gen_label_rtx ();
26543 emit_jump_insn (BRANCH (LT, done_label));
26544 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26545 out_down)));
26546 emit_label (done_label);
26547 }
26548 else
26549 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26550 out_down)));
26551 }
26552 else
26553 {
26554 /* Emit code for Thumb2 mode.
26555 Thumb2 can't do shift and or in one insn. */
26556 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26557 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26558
26559 if (code == ASHIFTRT)
26560 {
26561 rtx done_label = gen_label_rtx ();
26562 emit_jump_insn (BRANCH (LT, done_label));
26563 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26564 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26565 emit_label (done_label);
26566 }
26567 else
26568 {
26569 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26570 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26571 }
26572 }
26573
26574 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26575 }
26576
26577 #undef SUB_32
26578 #undef RSB_32
26579 #undef SUB_S_32
26580 #undef SET
26581 #undef SHIFT
26582 #undef LSHIFT
26583 #undef REV_LSHIFT
26584 #undef ORR
26585 #undef BRANCH
26586 }
26587
26588
26589 /* Returns true if a valid comparison operation and makes
26590 the operands in a form that is valid. */
26591 bool
26592 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26593 {
26594 enum rtx_code code = GET_CODE (*comparison);
26595 enum rtx_code canonical_code;
26596 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26597 ? GET_MODE (*op2) : GET_MODE (*op1);
26598
26599 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26600
26601 if (code == UNEQ || code == LTGT)
26602 return false;
26603
26604 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26605 PUT_CODE (*comparison, canonical_code);
26606
26607 switch (mode)
26608 {
26609 case SImode:
26610 if (!arm_add_operand (*op1, mode))
26611 *op1 = force_reg (mode, *op1);
26612 if (!arm_add_operand (*op2, mode))
26613 *op2 = force_reg (mode, *op2);
26614 return true;
26615
26616 case DImode:
26617 if (!cmpdi_operand (*op1, mode))
26618 *op1 = force_reg (mode, *op1);
26619 if (!cmpdi_operand (*op2, mode))
26620 *op2 = force_reg (mode, *op2);
26621 return true;
26622
26623 case SFmode:
26624 case DFmode:
26625 if (!arm_float_compare_operand (*op1, mode))
26626 *op1 = force_reg (mode, *op1);
26627 if (!arm_float_compare_operand (*op2, mode))
26628 *op2 = force_reg (mode, *op2);
26629 return true;
26630 default:
26631 break;
26632 }
26633
26634 return false;
26635
26636 }
26637
26638 #include "gt-arm.h"