]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
b84a04098ea29bd46e036b7419504fea8b2b47ff
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
58 #include "dumpfile.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 struct four_ints
67 {
68 int i[4];
69 };
70
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
188
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
196
197 static void arm_file_end (void);
198 static void arm_file_start (void);
199
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
217
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree);
240 static const char *arm_invalid_parameter_type (const_tree t);
241 static const char *arm_invalid_return_type (const_tree t);
242 static tree arm_promoted_type (const_tree t);
243 static tree arm_convert_to_type (tree type, tree expr);
244 static bool arm_scalar_mode_supported_p (enum machine_mode);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx, tree, rtx);
249 static rtx arm_trampoline_adjust_address (rtx);
250 static rtx arm_pic_static_addr (rtx orig, rtx reg);
251 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode,
255 unsigned HOST_WIDE_INT);
256 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
257 static bool arm_class_likely_spilled_p (reg_class_t);
258 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
261 const_tree type,
262 int misalignment,
263 bool is_packed);
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
269
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
271 const unsigned char *sel);
272
273 \f
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table[] =
276 {
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
281 call. */
282 { "long_call", 0, 0, false, true, true, NULL, false },
283 /* Whereas these functions are always known to reside within the 26 bit
284 addressing range. */
285 { "short_call", 0, 0, false, true, true, NULL, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
288 false },
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
291 false },
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
293 false },
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 false },
296 #ifdef ARM_PE
297 /* ARM/PE has three new attributes:
298 interfacearm - ?
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
301
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
304 multiple times.
305 */
306 { "dllimport", 0, 0, true, false, false, NULL, false },
307 { "dllexport", 0, 0, true, false, false, NULL, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 false },
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
314 false },
315 #endif
316 { NULL, 0, 0, false, false, false, NULL, false }
317 };
318 \f
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
323 #endif
324
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
327
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
330
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
335
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
340
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
347
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
350
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
353
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
356
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
359
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
362
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
365
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
368
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
371
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
374
375 #undef TARGET_ENCODE_SECTION_INFO
376 #ifdef ARM_PE
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
378 #else
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
380 #endif
381
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
384
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
387
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
390
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
393
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
396
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
399
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
404
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
409
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
421
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
424
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
431
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
434
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
449
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
452
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
455
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
462
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
465
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
499
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
511
512 #if ARM_UNWIND_INFO
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
519
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
522
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
529
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
535
536 #ifdef HAVE_AS_TLS
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
539 #endif
540
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
552
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
558
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
564
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
571
572 #ifdef HAVE_AS_TLS
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
575 #endif
576
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
582
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
585
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
588
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
591
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
594
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
597
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
600
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
603
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
606
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
609
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
612
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
616
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
620
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
624
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
628
629 struct gcc_target targetm = TARGET_INITIALIZER;
630 \f
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack;
633 static char * minipool_startobj;
634
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped = 5;
638
639 extern FILE * asm_out_file;
640
641 /* True if we are currently building a constant table. */
642 int making_const_table;
643
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune = arm_none;
646
647 /* The current tuning set. */
648 const struct tune_params *current_tune;
649
650 /* Which floating point hardware to schedule for. */
651 int arm_fpu_attr;
652
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc *arm_fpu_desc;
655
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label[14];
658 static int thumb_call_reg_needed;
659
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
681 profile. */
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
686 architecture. */
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689
690 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
691 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
692
693 /* Flags that only effect tuning, not available instructions. */
694 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
695 | FL_CO_PROC)
696
697 #define FL_FOR_ARCH2 FL_NOTM
698 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
699 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
700 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
701 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
702 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
703 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
704 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
705 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
706 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
707 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
708 #define FL_FOR_ARCH6J FL_FOR_ARCH6
709 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
710 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
711 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
712 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
713 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
714 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
715 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
716 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
717 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
719
720 /* The bits in this mask specify which
721 instructions we are allowed to generate. */
722 static unsigned long insn_flags = 0;
723
724 /* The bits in this mask specify which instruction scheduling options should
725 be used. */
726 static unsigned long tune_flags = 0;
727
728 /* The highest ARM architecture version supported by the
729 target. */
730 enum base_architecture arm_base_arch = BASE_ARCH_0;
731
732 /* The following are used in the arm.md file as equivalents to bits
733 in the above two flag variables. */
734
735 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
736 int arm_arch3m = 0;
737
738 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
739 int arm_arch4 = 0;
740
741 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
742 int arm_arch4t = 0;
743
744 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
745 int arm_arch5 = 0;
746
747 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
748 int arm_arch5e = 0;
749
750 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
751 int arm_arch6 = 0;
752
753 /* Nonzero if this chip supports the ARM 6K extensions. */
754 int arm_arch6k = 0;
755
756 /* Nonzero if this chip supports the ARM 7 extensions. */
757 int arm_arch7 = 0;
758
759 /* Nonzero if instructions not present in the 'M' profile can be used. */
760 int arm_arch_notm = 0;
761
762 /* Nonzero if instructions present in ARMv7E-M can be used. */
763 int arm_arch7em = 0;
764
765 /* Nonzero if this chip can benefit from load scheduling. */
766 int arm_ld_sched = 0;
767
768 /* Nonzero if this chip is a StrongARM. */
769 int arm_tune_strongarm = 0;
770
771 /* Nonzero if this chip supports Intel Wireless MMX technology. */
772 int arm_arch_iwmmxt = 0;
773
774 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
775 int arm_arch_iwmmxt2 = 0;
776
777 /* Nonzero if this chip is an XScale. */
778 int arm_arch_xscale = 0;
779
780 /* Nonzero if tuning for XScale */
781 int arm_tune_xscale = 0;
782
783 /* Nonzero if we want to tune for stores that access the write-buffer.
784 This typically means an ARM6 or ARM7 with MMU or MPU. */
785 int arm_tune_wbuf = 0;
786
787 /* Nonzero if tuning for Cortex-A9. */
788 int arm_tune_cortex_a9 = 0;
789
790 /* Nonzero if generating Thumb instructions. */
791 int thumb_code = 0;
792
793 /* Nonzero if generating Thumb-1 instructions. */
794 int thumb1_code = 0;
795
796 /* Nonzero if we should define __THUMB_INTERWORK__ in the
797 preprocessor.
798 XXX This is a bit of a hack, it's intended to help work around
799 problems in GLD which doesn't understand that armv5t code is
800 interworking clean. */
801 int arm_cpp_interwork = 0;
802
803 /* Nonzero if chip supports Thumb 2. */
804 int arm_arch_thumb2;
805
806 /* Nonzero if chip supports integer division instruction. */
807 int arm_arch_arm_hwdiv;
808 int arm_arch_thumb_hwdiv;
809
810 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
811 we must report the mode of the memory reference from
812 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
813 enum machine_mode output_memory_reference_mode;
814
815 /* The register number to be used for the PIC offset register. */
816 unsigned arm_pic_register = INVALID_REGNUM;
817
818 /* Set to 1 after arm_reorg has started. Reset to start at the start of
819 the next function. */
820 static int after_arm_reorg = 0;
821
822 enum arm_pcs arm_pcs_default;
823
824 /* For an explanation of these variables, see final_prescan_insn below. */
825 int arm_ccfsm_state;
826 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
827 enum arm_cond_code arm_current_cc;
828
829 rtx arm_target_insn;
830 int arm_target_label;
831 /* The number of conditionally executed insns, including the current insn. */
832 int arm_condexec_count = 0;
833 /* A bitmask specifying the patterns for the IT block.
834 Zero means do not output an IT block before this insn. */
835 int arm_condexec_mask = 0;
836 /* The number of bits used in arm_condexec_mask. */
837 int arm_condexec_masklen = 0;
838
839 /* The condition codes of the ARM, and the inverse function. */
840 static const char * const arm_condition_codes[] =
841 {
842 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
843 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
844 };
845
846 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
847 int arm_regs_in_sequence[] =
848 {
849 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
850 };
851
852 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
853 #define streq(string1, string2) (strcmp (string1, string2) == 0)
854
855 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
856 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
857 | (1 << PIC_OFFSET_TABLE_REGNUM)))
858 \f
859 /* Initialization code. */
860
861 struct processors
862 {
863 const char *const name;
864 enum processor_type core;
865 const char *arch;
866 enum base_architecture base_arch;
867 const unsigned long flags;
868 const struct tune_params *const tune;
869 };
870
871
872 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
873 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
874 prefetch_slots, \
875 l1_size, \
876 l1_line_size
877
878 const struct tune_params arm_slowmul_tune =
879 {
880 arm_slowmul_rtx_costs,
881 NULL,
882 3, /* Constant limit. */
883 5, /* Max cond insns. */
884 ARM_PREFETCH_NOT_BENEFICIAL,
885 true, /* Prefer constant pool. */
886 arm_default_branch_cost,
887 false /* Prefer LDRD/STRD. */
888 };
889
890 const struct tune_params arm_fastmul_tune =
891 {
892 arm_fastmul_rtx_costs,
893 NULL,
894 1, /* Constant limit. */
895 5, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost,
899 false /* Prefer LDRD/STRD. */
900 };
901
902 /* StrongARM has early execution of branches, so a sequence that is worth
903 skipping is shorter. Set max_insns_skipped to a lower value. */
904
905 const struct tune_params arm_strongarm_tune =
906 {
907 arm_fastmul_rtx_costs,
908 NULL,
909 1, /* Constant limit. */
910 3, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost,
914 false /* Prefer LDRD/STRD. */
915 };
916
917 const struct tune_params arm_xscale_tune =
918 {
919 arm_xscale_rtx_costs,
920 xscale_sched_adjust_cost,
921 2, /* Constant limit. */
922 3, /* Max cond insns. */
923 ARM_PREFETCH_NOT_BENEFICIAL,
924 true, /* Prefer constant pool. */
925 arm_default_branch_cost,
926 false /* Prefer LDRD/STRD. */
927 };
928
929 const struct tune_params arm_9e_tune =
930 {
931 arm_9e_rtx_costs,
932 NULL,
933 1, /* Constant limit. */
934 5, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL,
936 true, /* Prefer constant pool. */
937 arm_default_branch_cost,
938 false /* Prefer LDRD/STRD. */
939 };
940
941 const struct tune_params arm_v6t2_tune =
942 {
943 arm_9e_rtx_costs,
944 NULL,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL,
948 false, /* Prefer constant pool. */
949 arm_default_branch_cost,
950 false /* Prefer LDRD/STRD. */
951 };
952
953 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
954 const struct tune_params arm_cortex_tune =
955 {
956 arm_9e_rtx_costs,
957 NULL,
958 1, /* Constant limit. */
959 5, /* Max cond insns. */
960 ARM_PREFETCH_NOT_BENEFICIAL,
961 false, /* Prefer constant pool. */
962 arm_default_branch_cost,
963 false /* Prefer LDRD/STRD. */
964 };
965
966 const struct tune_params arm_cortex_a15_tune =
967 {
968 arm_9e_rtx_costs,
969 NULL,
970 1, /* Constant limit. */
971 5, /* Max cond insns. */
972 ARM_PREFETCH_NOT_BENEFICIAL,
973 false, /* Prefer constant pool. */
974 arm_default_branch_cost,
975 true /* Prefer LDRD/STRD. */
976 };
977
978 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
979 less appealing. Set max_insns_skipped to a low value. */
980
981 const struct tune_params arm_cortex_a5_tune =
982 {
983 arm_9e_rtx_costs,
984 NULL,
985 1, /* Constant limit. */
986 1, /* Max cond insns. */
987 ARM_PREFETCH_NOT_BENEFICIAL,
988 false, /* Prefer constant pool. */
989 arm_cortex_a5_branch_cost,
990 false /* Prefer LDRD/STRD. */
991 };
992
993 const struct tune_params arm_cortex_a9_tune =
994 {
995 arm_9e_rtx_costs,
996 cortex_a9_sched_adjust_cost,
997 1, /* Constant limit. */
998 5, /* Max cond insns. */
999 ARM_PREFETCH_BENEFICIAL(4,32,32),
1000 false, /* Prefer constant pool. */
1001 arm_default_branch_cost,
1002 false /* Prefer LDRD/STRD. */
1003 };
1004
1005 const struct tune_params arm_fa726te_tune =
1006 {
1007 arm_9e_rtx_costs,
1008 fa726te_sched_adjust_cost,
1009 1, /* Constant limit. */
1010 5, /* Max cond insns. */
1011 ARM_PREFETCH_NOT_BENEFICIAL,
1012 true, /* Prefer constant pool. */
1013 arm_default_branch_cost,
1014 false /* Prefer LDRD/STRD. */
1015 };
1016
1017
1018 /* Not all of these give usefully different compilation alternatives,
1019 but there is no simple way of generalizing them. */
1020 static const struct processors all_cores[] =
1021 {
1022 /* ARM Cores */
1023 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1024 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1025 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1026 #include "arm-cores.def"
1027 #undef ARM_CORE
1028 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1029 };
1030
1031 static const struct processors all_architectures[] =
1032 {
1033 /* ARM Architectures */
1034 /* We don't specify tuning costs here as it will be figured out
1035 from the core. */
1036
1037 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1038 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1039 #include "arm-arches.def"
1040 #undef ARM_ARCH
1041 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1042 };
1043
1044
1045 /* These are populated as commandline arguments are processed, or NULL
1046 if not specified. */
1047 static const struct processors *arm_selected_arch;
1048 static const struct processors *arm_selected_cpu;
1049 static const struct processors *arm_selected_tune;
1050
1051 /* The name of the preprocessor macro to define for this architecture. */
1052
1053 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1054
1055 /* Available values for -mfpu=. */
1056
1057 static const struct arm_fpu_desc all_fpus[] =
1058 {
1059 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1060 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1061 #include "arm-fpus.def"
1062 #undef ARM_FPU
1063 };
1064
1065
1066 /* Supported TLS relocations. */
1067
1068 enum tls_reloc {
1069 TLS_GD32,
1070 TLS_LDM32,
1071 TLS_LDO32,
1072 TLS_IE32,
1073 TLS_LE32,
1074 TLS_DESCSEQ /* GNU scheme */
1075 };
1076
1077 /* The maximum number of insns to be used when loading a constant. */
1078 inline static int
1079 arm_constant_limit (bool size_p)
1080 {
1081 return size_p ? 1 : current_tune->constant_limit;
1082 }
1083
1084 /* Emit an insn that's a simple single-set. Both the operands must be known
1085 to be valid. */
1086 inline static rtx
1087 emit_set_insn (rtx x, rtx y)
1088 {
1089 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1090 }
1091
1092 /* Return the number of bits set in VALUE. */
1093 static unsigned
1094 bit_count (unsigned long value)
1095 {
1096 unsigned long count = 0;
1097
1098 while (value)
1099 {
1100 count++;
1101 value &= value - 1; /* Clear the least-significant set bit. */
1102 }
1103
1104 return count;
1105 }
1106
1107 typedef struct
1108 {
1109 enum machine_mode mode;
1110 const char *name;
1111 } arm_fixed_mode_set;
1112
1113 /* A small helper for setting fixed-point library libfuncs. */
1114
1115 static void
1116 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1117 const char *funcname, const char *modename,
1118 int num_suffix)
1119 {
1120 char buffer[50];
1121
1122 if (num_suffix == 0)
1123 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1124 else
1125 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1126
1127 set_optab_libfunc (optable, mode, buffer);
1128 }
1129
1130 static void
1131 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1132 enum machine_mode from, const char *funcname,
1133 const char *toname, const char *fromname)
1134 {
1135 char buffer[50];
1136 const char *maybe_suffix_2 = "";
1137
1138 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1139 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1140 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1141 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1142 maybe_suffix_2 = "2";
1143
1144 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1145 maybe_suffix_2);
1146
1147 set_conv_libfunc (optable, to, from, buffer);
1148 }
1149
1150 /* Set up library functions unique to ARM. */
1151
1152 static void
1153 arm_init_libfuncs (void)
1154 {
1155 /* For Linux, we have access to kernel support for atomic operations. */
1156 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1157 init_sync_libfuncs (2 * UNITS_PER_WORD);
1158
1159 /* There are no special library functions unless we are using the
1160 ARM BPABI. */
1161 if (!TARGET_BPABI)
1162 return;
1163
1164 /* The functions below are described in Section 4 of the "Run-Time
1165 ABI for the ARM architecture", Version 1.0. */
1166
1167 /* Double-precision floating-point arithmetic. Table 2. */
1168 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1169 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1170 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1171 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1172 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1173
1174 /* Double-precision comparisons. Table 3. */
1175 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1176 set_optab_libfunc (ne_optab, DFmode, NULL);
1177 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1178 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1179 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1180 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1181 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1182
1183 /* Single-precision floating-point arithmetic. Table 4. */
1184 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1185 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1186 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1187 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1188 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1189
1190 /* Single-precision comparisons. Table 5. */
1191 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1192 set_optab_libfunc (ne_optab, SFmode, NULL);
1193 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1194 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1195 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1196 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1197 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1198
1199 /* Floating-point to integer conversions. Table 6. */
1200 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1201 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1202 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1203 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1204 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1205 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1206 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1207 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1208
1209 /* Conversions between floating types. Table 7. */
1210 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1211 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1212
1213 /* Integer to floating-point conversions. Table 8. */
1214 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1215 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1216 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1217 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1218 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1219 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1220 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1221 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1222
1223 /* Long long. Table 9. */
1224 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1225 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1226 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1227 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1228 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1229 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1230 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1231 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1232
1233 /* Integer (32/32->32) division. \S 4.3.1. */
1234 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1235 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1236
1237 /* The divmod functions are designed so that they can be used for
1238 plain division, even though they return both the quotient and the
1239 remainder. The quotient is returned in the usual location (i.e.,
1240 r0 for SImode, {r0, r1} for DImode), just as would be expected
1241 for an ordinary division routine. Because the AAPCS calling
1242 conventions specify that all of { r0, r1, r2, r3 } are
1243 callee-saved registers, there is no need to tell the compiler
1244 explicitly that those registers are clobbered by these
1245 routines. */
1246 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1247 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1248
1249 /* For SImode division the ABI provides div-without-mod routines,
1250 which are faster. */
1251 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1252 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1253
1254 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1255 divmod libcalls instead. */
1256 set_optab_libfunc (smod_optab, DImode, NULL);
1257 set_optab_libfunc (umod_optab, DImode, NULL);
1258 set_optab_libfunc (smod_optab, SImode, NULL);
1259 set_optab_libfunc (umod_optab, SImode, NULL);
1260
1261 /* Half-precision float operations. The compiler handles all operations
1262 with NULL libfuncs by converting the SFmode. */
1263 switch (arm_fp16_format)
1264 {
1265 case ARM_FP16_FORMAT_IEEE:
1266 case ARM_FP16_FORMAT_ALTERNATIVE:
1267
1268 /* Conversions. */
1269 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1270 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1271 ? "__gnu_f2h_ieee"
1272 : "__gnu_f2h_alternative"));
1273 set_conv_libfunc (sext_optab, SFmode, HFmode,
1274 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1275 ? "__gnu_h2f_ieee"
1276 : "__gnu_h2f_alternative"));
1277
1278 /* Arithmetic. */
1279 set_optab_libfunc (add_optab, HFmode, NULL);
1280 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1281 set_optab_libfunc (smul_optab, HFmode, NULL);
1282 set_optab_libfunc (neg_optab, HFmode, NULL);
1283 set_optab_libfunc (sub_optab, HFmode, NULL);
1284
1285 /* Comparisons. */
1286 set_optab_libfunc (eq_optab, HFmode, NULL);
1287 set_optab_libfunc (ne_optab, HFmode, NULL);
1288 set_optab_libfunc (lt_optab, HFmode, NULL);
1289 set_optab_libfunc (le_optab, HFmode, NULL);
1290 set_optab_libfunc (ge_optab, HFmode, NULL);
1291 set_optab_libfunc (gt_optab, HFmode, NULL);
1292 set_optab_libfunc (unord_optab, HFmode, NULL);
1293 break;
1294
1295 default:
1296 break;
1297 }
1298
1299 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1300 {
1301 const arm_fixed_mode_set fixed_arith_modes[] =
1302 {
1303 { QQmode, "qq" },
1304 { UQQmode, "uqq" },
1305 { HQmode, "hq" },
1306 { UHQmode, "uhq" },
1307 { SQmode, "sq" },
1308 { USQmode, "usq" },
1309 { DQmode, "dq" },
1310 { UDQmode, "udq" },
1311 { TQmode, "tq" },
1312 { UTQmode, "utq" },
1313 { HAmode, "ha" },
1314 { UHAmode, "uha" },
1315 { SAmode, "sa" },
1316 { USAmode, "usa" },
1317 { DAmode, "da" },
1318 { UDAmode, "uda" },
1319 { TAmode, "ta" },
1320 { UTAmode, "uta" }
1321 };
1322 const arm_fixed_mode_set fixed_conv_modes[] =
1323 {
1324 { QQmode, "qq" },
1325 { UQQmode, "uqq" },
1326 { HQmode, "hq" },
1327 { UHQmode, "uhq" },
1328 { SQmode, "sq" },
1329 { USQmode, "usq" },
1330 { DQmode, "dq" },
1331 { UDQmode, "udq" },
1332 { TQmode, "tq" },
1333 { UTQmode, "utq" },
1334 { HAmode, "ha" },
1335 { UHAmode, "uha" },
1336 { SAmode, "sa" },
1337 { USAmode, "usa" },
1338 { DAmode, "da" },
1339 { UDAmode, "uda" },
1340 { TAmode, "ta" },
1341 { UTAmode, "uta" },
1342 { QImode, "qi" },
1343 { HImode, "hi" },
1344 { SImode, "si" },
1345 { DImode, "di" },
1346 { TImode, "ti" },
1347 { SFmode, "sf" },
1348 { DFmode, "df" }
1349 };
1350 unsigned int i, j;
1351
1352 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1353 {
1354 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1355 "add", fixed_arith_modes[i].name, 3);
1356 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1357 "ssadd", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1359 "usadd", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1361 "sub", fixed_arith_modes[i].name, 3);
1362 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1363 "sssub", fixed_arith_modes[i].name, 3);
1364 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1365 "ussub", fixed_arith_modes[i].name, 3);
1366 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1367 "mul", fixed_arith_modes[i].name, 3);
1368 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1369 "ssmul", fixed_arith_modes[i].name, 3);
1370 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1371 "usmul", fixed_arith_modes[i].name, 3);
1372 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1373 "div", fixed_arith_modes[i].name, 3);
1374 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1375 "udiv", fixed_arith_modes[i].name, 3);
1376 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1377 "ssdiv", fixed_arith_modes[i].name, 3);
1378 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1379 "usdiv", fixed_arith_modes[i].name, 3);
1380 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1381 "neg", fixed_arith_modes[i].name, 2);
1382 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1383 "ssneg", fixed_arith_modes[i].name, 2);
1384 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1385 "usneg", fixed_arith_modes[i].name, 2);
1386 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1387 "ashl", fixed_arith_modes[i].name, 3);
1388 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1389 "ashr", fixed_arith_modes[i].name, 3);
1390 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1391 "lshr", fixed_arith_modes[i].name, 3);
1392 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1393 "ssashl", fixed_arith_modes[i].name, 3);
1394 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1395 "usashl", fixed_arith_modes[i].name, 3);
1396 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1397 "cmp", fixed_arith_modes[i].name, 2);
1398 }
1399
1400 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1401 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1402 {
1403 if (i == j
1404 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1405 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1406 continue;
1407
1408 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1409 fixed_conv_modes[j].mode, "fract",
1410 fixed_conv_modes[i].name,
1411 fixed_conv_modes[j].name);
1412 arm_set_fixed_conv_libfunc (satfract_optab,
1413 fixed_conv_modes[i].mode,
1414 fixed_conv_modes[j].mode, "satfract",
1415 fixed_conv_modes[i].name,
1416 fixed_conv_modes[j].name);
1417 arm_set_fixed_conv_libfunc (fractuns_optab,
1418 fixed_conv_modes[i].mode,
1419 fixed_conv_modes[j].mode, "fractuns",
1420 fixed_conv_modes[i].name,
1421 fixed_conv_modes[j].name);
1422 arm_set_fixed_conv_libfunc (satfractuns_optab,
1423 fixed_conv_modes[i].mode,
1424 fixed_conv_modes[j].mode, "satfractuns",
1425 fixed_conv_modes[i].name,
1426 fixed_conv_modes[j].name);
1427 }
1428 }
1429
1430 if (TARGET_AAPCS_BASED)
1431 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1432 }
1433
1434 /* On AAPCS systems, this is the "struct __va_list". */
1435 static GTY(()) tree va_list_type;
1436
1437 /* Return the type to use as __builtin_va_list. */
1438 static tree
1439 arm_build_builtin_va_list (void)
1440 {
1441 tree va_list_name;
1442 tree ap_field;
1443
1444 if (!TARGET_AAPCS_BASED)
1445 return std_build_builtin_va_list ();
1446
1447 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1448 defined as:
1449
1450 struct __va_list
1451 {
1452 void *__ap;
1453 };
1454
1455 The C Library ABI further reinforces this definition in \S
1456 4.1.
1457
1458 We must follow this definition exactly. The structure tag
1459 name is visible in C++ mangled names, and thus forms a part
1460 of the ABI. The field name may be used by people who
1461 #include <stdarg.h>. */
1462 /* Create the type. */
1463 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1464 /* Give it the required name. */
1465 va_list_name = build_decl (BUILTINS_LOCATION,
1466 TYPE_DECL,
1467 get_identifier ("__va_list"),
1468 va_list_type);
1469 DECL_ARTIFICIAL (va_list_name) = 1;
1470 TYPE_NAME (va_list_type) = va_list_name;
1471 TYPE_STUB_DECL (va_list_type) = va_list_name;
1472 /* Create the __ap field. */
1473 ap_field = build_decl (BUILTINS_LOCATION,
1474 FIELD_DECL,
1475 get_identifier ("__ap"),
1476 ptr_type_node);
1477 DECL_ARTIFICIAL (ap_field) = 1;
1478 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1479 TYPE_FIELDS (va_list_type) = ap_field;
1480 /* Compute its layout. */
1481 layout_type (va_list_type);
1482
1483 return va_list_type;
1484 }
1485
1486 /* Return an expression of type "void *" pointing to the next
1487 available argument in a variable-argument list. VALIST is the
1488 user-level va_list object, of type __builtin_va_list. */
1489 static tree
1490 arm_extract_valist_ptr (tree valist)
1491 {
1492 if (TREE_TYPE (valist) == error_mark_node)
1493 return error_mark_node;
1494
1495 /* On an AAPCS target, the pointer is stored within "struct
1496 va_list". */
1497 if (TARGET_AAPCS_BASED)
1498 {
1499 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1500 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1501 valist, ap_field, NULL_TREE);
1502 }
1503
1504 return valist;
1505 }
1506
1507 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1508 static void
1509 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1510 {
1511 valist = arm_extract_valist_ptr (valist);
1512 std_expand_builtin_va_start (valist, nextarg);
1513 }
1514
1515 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1516 static tree
1517 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1518 gimple_seq *post_p)
1519 {
1520 valist = arm_extract_valist_ptr (valist);
1521 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1522 }
1523
1524 /* Fix up any incompatible options that the user has specified. */
1525 static void
1526 arm_option_override (void)
1527 {
1528 if (global_options_set.x_arm_arch_option)
1529 arm_selected_arch = &all_architectures[arm_arch_option];
1530
1531 if (global_options_set.x_arm_cpu_option)
1532 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1533
1534 if (global_options_set.x_arm_tune_option)
1535 arm_selected_tune = &all_cores[(int) arm_tune_option];
1536
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS;
1539 #endif
1540
1541 if (arm_selected_arch)
1542 {
1543 if (arm_selected_cpu)
1544 {
1545 /* Check for conflict between mcpu and march. */
1546 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1547 {
1548 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1549 arm_selected_cpu->name, arm_selected_arch->name);
1550 /* -march wins for code generation.
1551 -mcpu wins for default tuning. */
1552 if (!arm_selected_tune)
1553 arm_selected_tune = arm_selected_cpu;
1554
1555 arm_selected_cpu = arm_selected_arch;
1556 }
1557 else
1558 /* -mcpu wins. */
1559 arm_selected_arch = NULL;
1560 }
1561 else
1562 /* Pick a CPU based on the architecture. */
1563 arm_selected_cpu = arm_selected_arch;
1564 }
1565
1566 /* If the user did not specify a processor, choose one for them. */
1567 if (!arm_selected_cpu)
1568 {
1569 const struct processors * sel;
1570 unsigned int sought;
1571
1572 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1573 if (!arm_selected_cpu->name)
1574 {
1575 #ifdef SUBTARGET_CPU_DEFAULT
1576 /* Use the subtarget default CPU if none was specified by
1577 configure. */
1578 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1579 #endif
1580 /* Default to ARM6. */
1581 if (!arm_selected_cpu->name)
1582 arm_selected_cpu = &all_cores[arm6];
1583 }
1584
1585 sel = arm_selected_cpu;
1586 insn_flags = sel->flags;
1587
1588 /* Now check to see if the user has specified some command line
1589 switch that require certain abilities from the cpu. */
1590 sought = 0;
1591
1592 if (TARGET_INTERWORK || TARGET_THUMB)
1593 {
1594 sought |= (FL_THUMB | FL_MODE32);
1595
1596 /* There are no ARM processors that support both APCS-26 and
1597 interworking. Therefore we force FL_MODE26 to be removed
1598 from insn_flags here (if it was set), so that the search
1599 below will always be able to find a compatible processor. */
1600 insn_flags &= ~FL_MODE26;
1601 }
1602
1603 if (sought != 0 && ((sought & insn_flags) != sought))
1604 {
1605 /* Try to locate a CPU type that supports all of the abilities
1606 of the default CPU, plus the extra abilities requested by
1607 the user. */
1608 for (sel = all_cores; sel->name != NULL; sel++)
1609 if ((sel->flags & sought) == (sought | insn_flags))
1610 break;
1611
1612 if (sel->name == NULL)
1613 {
1614 unsigned current_bit_count = 0;
1615 const struct processors * best_fit = NULL;
1616
1617 /* Ideally we would like to issue an error message here
1618 saying that it was not possible to find a CPU compatible
1619 with the default CPU, but which also supports the command
1620 line options specified by the programmer, and so they
1621 ought to use the -mcpu=<name> command line option to
1622 override the default CPU type.
1623
1624 If we cannot find a cpu that has both the
1625 characteristics of the default cpu and the given
1626 command line options we scan the array again looking
1627 for a best match. */
1628 for (sel = all_cores; sel->name != NULL; sel++)
1629 if ((sel->flags & sought) == sought)
1630 {
1631 unsigned count;
1632
1633 count = bit_count (sel->flags & insn_flags);
1634
1635 if (count >= current_bit_count)
1636 {
1637 best_fit = sel;
1638 current_bit_count = count;
1639 }
1640 }
1641
1642 gcc_assert (best_fit);
1643 sel = best_fit;
1644 }
1645
1646 arm_selected_cpu = sel;
1647 }
1648 }
1649
1650 gcc_assert (arm_selected_cpu);
1651 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1652 if (!arm_selected_tune)
1653 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1654
1655 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1656 insn_flags = arm_selected_cpu->flags;
1657 arm_base_arch = arm_selected_cpu->base_arch;
1658
1659 arm_tune = arm_selected_tune->core;
1660 tune_flags = arm_selected_tune->flags;
1661 current_tune = arm_selected_tune->tune;
1662
1663 /* Make sure that the processor choice does not conflict with any of the
1664 other command line choices. */
1665 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1666 error ("target CPU does not support ARM mode");
1667
1668 /* BPABI targets use linker tricks to allow interworking on cores
1669 without thumb support. */
1670 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1671 {
1672 warning (0, "target CPU does not support interworking" );
1673 target_flags &= ~MASK_INTERWORK;
1674 }
1675
1676 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1677 {
1678 warning (0, "target CPU does not support THUMB instructions");
1679 target_flags &= ~MASK_THUMB;
1680 }
1681
1682 if (TARGET_APCS_FRAME && TARGET_THUMB)
1683 {
1684 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1685 target_flags &= ~MASK_APCS_FRAME;
1686 }
1687
1688 /* Callee super interworking implies thumb interworking. Adding
1689 this to the flags here simplifies the logic elsewhere. */
1690 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1691 target_flags |= MASK_INTERWORK;
1692
1693 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1694 from here where no function is being compiled currently. */
1695 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1696 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1697
1698 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1699 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1700
1701 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1702 {
1703 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1704 target_flags |= MASK_APCS_FRAME;
1705 }
1706
1707 if (TARGET_POKE_FUNCTION_NAME)
1708 target_flags |= MASK_APCS_FRAME;
1709
1710 if (TARGET_APCS_REENT && flag_pic)
1711 error ("-fpic and -mapcs-reent are incompatible");
1712
1713 if (TARGET_APCS_REENT)
1714 warning (0, "APCS reentrant code not supported. Ignored");
1715
1716 /* If this target is normally configured to use APCS frames, warn if they
1717 are turned off and debugging is turned on. */
1718 if (TARGET_ARM
1719 && write_symbols != NO_DEBUG
1720 && !TARGET_APCS_FRAME
1721 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1722 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1723
1724 if (TARGET_APCS_FLOAT)
1725 warning (0, "passing floating point arguments in fp regs not yet supported");
1726
1727 if (TARGET_LITTLE_WORDS)
1728 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1729 "will be removed in a future release");
1730
1731 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1732 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1733 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1734 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1735 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1736 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1737 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1738 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1739 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1740 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1741 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1742 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1743 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1744
1745 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1746 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1747 thumb_code = TARGET_ARM == 0;
1748 thumb1_code = TARGET_THUMB1 != 0;
1749 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1750 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1751 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1752 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1753 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1754 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1755 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1756
1757 /* If we are not using the default (ARM mode) section anchor offset
1758 ranges, then set the correct ranges now. */
1759 if (TARGET_THUMB1)
1760 {
1761 /* Thumb-1 LDR instructions cannot have negative offsets.
1762 Permissible positive offset ranges are 5-bit (for byte loads),
1763 6-bit (for halfword loads), or 7-bit (for word loads).
1764 Empirical results suggest a 7-bit anchor range gives the best
1765 overall code size. */
1766 targetm.min_anchor_offset = 0;
1767 targetm.max_anchor_offset = 127;
1768 }
1769 else if (TARGET_THUMB2)
1770 {
1771 /* The minimum is set such that the total size of the block
1772 for a particular anchor is 248 + 1 + 4095 bytes, which is
1773 divisible by eight, ensuring natural spacing of anchors. */
1774 targetm.min_anchor_offset = -248;
1775 targetm.max_anchor_offset = 4095;
1776 }
1777
1778 /* V5 code we generate is completely interworking capable, so we turn off
1779 TARGET_INTERWORK here to avoid many tests later on. */
1780
1781 /* XXX However, we must pass the right pre-processor defines to CPP
1782 or GLD can get confused. This is a hack. */
1783 if (TARGET_INTERWORK)
1784 arm_cpp_interwork = 1;
1785
1786 if (arm_arch5)
1787 target_flags &= ~MASK_INTERWORK;
1788
1789 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1790 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1791
1792 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1793 error ("iwmmxt abi requires an iwmmxt capable cpu");
1794
1795 if (!global_options_set.x_arm_fpu_index)
1796 {
1797 const char *target_fpu_name;
1798 bool ok;
1799
1800 #ifdef FPUTYPE_DEFAULT
1801 target_fpu_name = FPUTYPE_DEFAULT;
1802 #else
1803 target_fpu_name = "vfp";
1804 #endif
1805
1806 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1807 CL_TARGET);
1808 gcc_assert (ok);
1809 }
1810
1811 arm_fpu_desc = &all_fpus[arm_fpu_index];
1812
1813 switch (arm_fpu_desc->model)
1814 {
1815 case ARM_FP_MODEL_VFP:
1816 arm_fpu_attr = FPU_VFP;
1817 break;
1818
1819 default:
1820 gcc_unreachable();
1821 }
1822
1823 if (TARGET_AAPCS_BASED)
1824 {
1825 if (TARGET_CALLER_INTERWORKING)
1826 error ("AAPCS does not support -mcaller-super-interworking");
1827 else
1828 if (TARGET_CALLEE_INTERWORKING)
1829 error ("AAPCS does not support -mcallee-super-interworking");
1830 }
1831
1832 /* iWMMXt and NEON are incompatible. */
1833 if (TARGET_IWMMXT && TARGET_NEON)
1834 error ("iWMMXt and NEON are incompatible");
1835
1836 /* iWMMXt unsupported under Thumb mode. */
1837 if (TARGET_THUMB && TARGET_IWMMXT)
1838 error ("iWMMXt unsupported under Thumb mode");
1839
1840 /* __fp16 support currently assumes the core has ldrh. */
1841 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1842 sorry ("__fp16 and no ldrh");
1843
1844 /* If soft-float is specified then don't use FPU. */
1845 if (TARGET_SOFT_FLOAT)
1846 arm_fpu_attr = FPU_NONE;
1847
1848 if (TARGET_AAPCS_BASED)
1849 {
1850 if (arm_abi == ARM_ABI_IWMMXT)
1851 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1852 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1853 && TARGET_HARD_FLOAT
1854 && TARGET_VFP)
1855 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1856 else
1857 arm_pcs_default = ARM_PCS_AAPCS;
1858 }
1859 else
1860 {
1861 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1862 sorry ("-mfloat-abi=hard and VFP");
1863
1864 if (arm_abi == ARM_ABI_APCS)
1865 arm_pcs_default = ARM_PCS_APCS;
1866 else
1867 arm_pcs_default = ARM_PCS_ATPCS;
1868 }
1869
1870 /* For arm2/3 there is no need to do any scheduling if we are doing
1871 software floating-point. */
1872 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1873 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1874
1875 /* Use the cp15 method if it is available. */
1876 if (target_thread_pointer == TP_AUTO)
1877 {
1878 if (arm_arch6k && !TARGET_THUMB1)
1879 target_thread_pointer = TP_CP15;
1880 else
1881 target_thread_pointer = TP_SOFT;
1882 }
1883
1884 if (TARGET_HARD_TP && TARGET_THUMB1)
1885 error ("can not use -mtp=cp15 with 16-bit Thumb");
1886
1887 /* Override the default structure alignment for AAPCS ABI. */
1888 if (!global_options_set.x_arm_structure_size_boundary)
1889 {
1890 if (TARGET_AAPCS_BASED)
1891 arm_structure_size_boundary = 8;
1892 }
1893 else
1894 {
1895 if (arm_structure_size_boundary != 8
1896 && arm_structure_size_boundary != 32
1897 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1898 {
1899 if (ARM_DOUBLEWORD_ALIGN)
1900 warning (0,
1901 "structure size boundary can only be set to 8, 32 or 64");
1902 else
1903 warning (0, "structure size boundary can only be set to 8 or 32");
1904 arm_structure_size_boundary
1905 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1906 }
1907 }
1908
1909 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1910 {
1911 error ("RTP PIC is incompatible with Thumb");
1912 flag_pic = 0;
1913 }
1914
1915 /* If stack checking is disabled, we can use r10 as the PIC register,
1916 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1917 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1918 {
1919 if (TARGET_VXWORKS_RTP)
1920 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1921 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1922 }
1923
1924 if (flag_pic && TARGET_VXWORKS_RTP)
1925 arm_pic_register = 9;
1926
1927 if (arm_pic_register_string != NULL)
1928 {
1929 int pic_register = decode_reg_name (arm_pic_register_string);
1930
1931 if (!flag_pic)
1932 warning (0, "-mpic-register= is useless without -fpic");
1933
1934 /* Prevent the user from choosing an obviously stupid PIC register. */
1935 else if (pic_register < 0 || call_used_regs[pic_register]
1936 || pic_register == HARD_FRAME_POINTER_REGNUM
1937 || pic_register == STACK_POINTER_REGNUM
1938 || pic_register >= PC_REGNUM
1939 || (TARGET_VXWORKS_RTP
1940 && (unsigned int) pic_register != arm_pic_register))
1941 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1942 else
1943 arm_pic_register = pic_register;
1944 }
1945
1946 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1947 if (fix_cm3_ldrd == 2)
1948 {
1949 if (arm_selected_cpu->core == cortexm3)
1950 fix_cm3_ldrd = 1;
1951 else
1952 fix_cm3_ldrd = 0;
1953 }
1954
1955 /* Enable -munaligned-access by default for
1956 - all ARMv6 architecture-based processors
1957 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1958
1959 Disable -munaligned-access by default for
1960 - all pre-ARMv6 architecture-based processors
1961 - ARMv6-M architecture-based processors. */
1962
1963 if (unaligned_access == 2)
1964 {
1965 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1966 unaligned_access = 1;
1967 else
1968 unaligned_access = 0;
1969 }
1970 else if (unaligned_access == 1
1971 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1972 {
1973 warning (0, "target CPU does not support unaligned accesses");
1974 unaligned_access = 0;
1975 }
1976
1977 if (TARGET_THUMB1 && flag_schedule_insns)
1978 {
1979 /* Don't warn since it's on by default in -O2. */
1980 flag_schedule_insns = 0;
1981 }
1982
1983 if (optimize_size)
1984 {
1985 /* If optimizing for size, bump the number of instructions that we
1986 are prepared to conditionally execute (even on a StrongARM). */
1987 max_insns_skipped = 6;
1988 }
1989 else
1990 max_insns_skipped = current_tune->max_insns_skipped;
1991
1992 /* Hot/Cold partitioning is not currently supported, since we can't
1993 handle literal pool placement in that case. */
1994 if (flag_reorder_blocks_and_partition)
1995 {
1996 inform (input_location,
1997 "-freorder-blocks-and-partition not supported on this architecture");
1998 flag_reorder_blocks_and_partition = 0;
1999 flag_reorder_blocks = 1;
2000 }
2001
2002 if (flag_pic)
2003 /* Hoisting PIC address calculations more aggressively provides a small,
2004 but measurable, size reduction for PIC code. Therefore, we decrease
2005 the bar for unrestricted expression hoisting to the cost of PIC address
2006 calculation, which is 2 instructions. */
2007 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2008 global_options.x_param_values,
2009 global_options_set.x_param_values);
2010
2011 /* ARM EABI defaults to strict volatile bitfields. */
2012 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2013 && abi_version_at_least(2))
2014 flag_strict_volatile_bitfields = 1;
2015
2016 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2017 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2018 if (flag_prefetch_loop_arrays < 0
2019 && HAVE_prefetch
2020 && optimize >= 3
2021 && current_tune->num_prefetch_slots > 0)
2022 flag_prefetch_loop_arrays = 1;
2023
2024 /* Set up parameters to be used in prefetching algorithm. Do not override the
2025 defaults unless we are tuning for a core we have researched values for. */
2026 if (current_tune->num_prefetch_slots > 0)
2027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2028 current_tune->num_prefetch_slots,
2029 global_options.x_param_values,
2030 global_options_set.x_param_values);
2031 if (current_tune->l1_cache_line_size >= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2033 current_tune->l1_cache_line_size,
2034 global_options.x_param_values,
2035 global_options_set.x_param_values);
2036 if (current_tune->l1_cache_size >= 0)
2037 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2038 current_tune->l1_cache_size,
2039 global_options.x_param_values,
2040 global_options_set.x_param_values);
2041
2042 /* Use the alternative scheduling-pressure algorithm by default. */
2043 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2044 global_options.x_param_values,
2045 global_options_set.x_param_values);
2046
2047 /* Register global variables with the garbage collector. */
2048 arm_add_gc_roots ();
2049 }
2050
2051 static void
2052 arm_add_gc_roots (void)
2053 {
2054 gcc_obstack_init(&minipool_obstack);
2055 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2056 }
2057 \f
2058 /* A table of known ARM exception types.
2059 For use with the interrupt function attribute. */
2060
2061 typedef struct
2062 {
2063 const char *const arg;
2064 const unsigned long return_value;
2065 }
2066 isr_attribute_arg;
2067
2068 static const isr_attribute_arg isr_attribute_args [] =
2069 {
2070 { "IRQ", ARM_FT_ISR },
2071 { "irq", ARM_FT_ISR },
2072 { "FIQ", ARM_FT_FIQ },
2073 { "fiq", ARM_FT_FIQ },
2074 { "ABORT", ARM_FT_ISR },
2075 { "abort", ARM_FT_ISR },
2076 { "ABORT", ARM_FT_ISR },
2077 { "abort", ARM_FT_ISR },
2078 { "UNDEF", ARM_FT_EXCEPTION },
2079 { "undef", ARM_FT_EXCEPTION },
2080 { "SWI", ARM_FT_EXCEPTION },
2081 { "swi", ARM_FT_EXCEPTION },
2082 { NULL, ARM_FT_NORMAL }
2083 };
2084
2085 /* Returns the (interrupt) function type of the current
2086 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2087
2088 static unsigned long
2089 arm_isr_value (tree argument)
2090 {
2091 const isr_attribute_arg * ptr;
2092 const char * arg;
2093
2094 if (!arm_arch_notm)
2095 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2096
2097 /* No argument - default to IRQ. */
2098 if (argument == NULL_TREE)
2099 return ARM_FT_ISR;
2100
2101 /* Get the value of the argument. */
2102 if (TREE_VALUE (argument) == NULL_TREE
2103 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2104 return ARM_FT_UNKNOWN;
2105
2106 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2107
2108 /* Check it against the list of known arguments. */
2109 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2110 if (streq (arg, ptr->arg))
2111 return ptr->return_value;
2112
2113 /* An unrecognized interrupt type. */
2114 return ARM_FT_UNKNOWN;
2115 }
2116
2117 /* Computes the type of the current function. */
2118
2119 static unsigned long
2120 arm_compute_func_type (void)
2121 {
2122 unsigned long type = ARM_FT_UNKNOWN;
2123 tree a;
2124 tree attr;
2125
2126 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2127
2128 /* Decide if the current function is volatile. Such functions
2129 never return, and many memory cycles can be saved by not storing
2130 register values that will never be needed again. This optimization
2131 was added to speed up context switching in a kernel application. */
2132 if (optimize > 0
2133 && (TREE_NOTHROW (current_function_decl)
2134 || !(flag_unwind_tables
2135 || (flag_exceptions
2136 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2137 && TREE_THIS_VOLATILE (current_function_decl))
2138 type |= ARM_FT_VOLATILE;
2139
2140 if (cfun->static_chain_decl != NULL)
2141 type |= ARM_FT_NESTED;
2142
2143 attr = DECL_ATTRIBUTES (current_function_decl);
2144
2145 a = lookup_attribute ("naked", attr);
2146 if (a != NULL_TREE)
2147 type |= ARM_FT_NAKED;
2148
2149 a = lookup_attribute ("isr", attr);
2150 if (a == NULL_TREE)
2151 a = lookup_attribute ("interrupt", attr);
2152
2153 if (a == NULL_TREE)
2154 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2155 else
2156 type |= arm_isr_value (TREE_VALUE (a));
2157
2158 return type;
2159 }
2160
2161 /* Returns the type of the current function. */
2162
2163 unsigned long
2164 arm_current_func_type (void)
2165 {
2166 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2167 cfun->machine->func_type = arm_compute_func_type ();
2168
2169 return cfun->machine->func_type;
2170 }
2171
2172 bool
2173 arm_allocate_stack_slots_for_args (void)
2174 {
2175 /* Naked functions should not allocate stack slots for arguments. */
2176 return !IS_NAKED (arm_current_func_type ());
2177 }
2178
2179 static bool
2180 arm_warn_func_return (tree decl)
2181 {
2182 /* Naked functions are implemented entirely in assembly, including the
2183 return sequence, so suppress warnings about this. */
2184 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2185 }
2186
2187 \f
2188 /* Output assembler code for a block containing the constant parts
2189 of a trampoline, leaving space for the variable parts.
2190
2191 On the ARM, (if r8 is the static chain regnum, and remembering that
2192 referencing pc adds an offset of 8) the trampoline looks like:
2193 ldr r8, [pc, #0]
2194 ldr pc, [pc]
2195 .word static chain value
2196 .word function's address
2197 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2198
2199 static void
2200 arm_asm_trampoline_template (FILE *f)
2201 {
2202 if (TARGET_ARM)
2203 {
2204 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2205 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2206 }
2207 else if (TARGET_THUMB2)
2208 {
2209 /* The Thumb-2 trampoline is similar to the arm implementation.
2210 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2211 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2212 STATIC_CHAIN_REGNUM, PC_REGNUM);
2213 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2214 }
2215 else
2216 {
2217 ASM_OUTPUT_ALIGN (f, 2);
2218 fprintf (f, "\t.code\t16\n");
2219 fprintf (f, ".Ltrampoline_start:\n");
2220 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2221 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2222 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2223 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2224 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2225 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2226 }
2227 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2228 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2229 }
2230
2231 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2232
2233 static void
2234 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2235 {
2236 rtx fnaddr, mem, a_tramp;
2237
2238 emit_block_move (m_tramp, assemble_trampoline_template (),
2239 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2240
2241 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2242 emit_move_insn (mem, chain_value);
2243
2244 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2245 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2246 emit_move_insn (mem, fnaddr);
2247
2248 a_tramp = XEXP (m_tramp, 0);
2249 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2250 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2251 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2252 }
2253
2254 /* Thumb trampolines should be entered in thumb mode, so set
2255 the bottom bit of the address. */
2256
2257 static rtx
2258 arm_trampoline_adjust_address (rtx addr)
2259 {
2260 if (TARGET_THUMB)
2261 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2262 NULL, 0, OPTAB_LIB_WIDEN);
2263 return addr;
2264 }
2265 \f
2266 /* Return 1 if it is possible to return using a single instruction.
2267 If SIBLING is non-null, this is a test for a return before a sibling
2268 call. SIBLING is the call insn, so we can examine its register usage. */
2269
2270 int
2271 use_return_insn (int iscond, rtx sibling)
2272 {
2273 int regno;
2274 unsigned int func_type;
2275 unsigned long saved_int_regs;
2276 unsigned HOST_WIDE_INT stack_adjust;
2277 arm_stack_offsets *offsets;
2278
2279 /* Never use a return instruction before reload has run. */
2280 if (!reload_completed)
2281 return 0;
2282
2283 func_type = arm_current_func_type ();
2284
2285 /* Naked, volatile and stack alignment functions need special
2286 consideration. */
2287 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2288 return 0;
2289
2290 /* So do interrupt functions that use the frame pointer and Thumb
2291 interrupt functions. */
2292 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2293 return 0;
2294
2295 offsets = arm_get_frame_offsets ();
2296 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2297
2298 /* As do variadic functions. */
2299 if (crtl->args.pretend_args_size
2300 || cfun->machine->uses_anonymous_args
2301 /* Or if the function calls __builtin_eh_return () */
2302 || crtl->calls_eh_return
2303 /* Or if the function calls alloca */
2304 || cfun->calls_alloca
2305 /* Or if there is a stack adjustment. However, if the stack pointer
2306 is saved on the stack, we can use a pre-incrementing stack load. */
2307 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2308 && stack_adjust == 4)))
2309 return 0;
2310
2311 saved_int_regs = offsets->saved_regs_mask;
2312
2313 /* Unfortunately, the insn
2314
2315 ldmib sp, {..., sp, ...}
2316
2317 triggers a bug on most SA-110 based devices, such that the stack
2318 pointer won't be correctly restored if the instruction takes a
2319 page fault. We work around this problem by popping r3 along with
2320 the other registers, since that is never slower than executing
2321 another instruction.
2322
2323 We test for !arm_arch5 here, because code for any architecture
2324 less than this could potentially be run on one of the buggy
2325 chips. */
2326 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2327 {
2328 /* Validate that r3 is a call-clobbered register (always true in
2329 the default abi) ... */
2330 if (!call_used_regs[3])
2331 return 0;
2332
2333 /* ... that it isn't being used for a return value ... */
2334 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2335 return 0;
2336
2337 /* ... or for a tail-call argument ... */
2338 if (sibling)
2339 {
2340 gcc_assert (CALL_P (sibling));
2341
2342 if (find_regno_fusage (sibling, USE, 3))
2343 return 0;
2344 }
2345
2346 /* ... and that there are no call-saved registers in r0-r2
2347 (always true in the default ABI). */
2348 if (saved_int_regs & 0x7)
2349 return 0;
2350 }
2351
2352 /* Can't be done if interworking with Thumb, and any registers have been
2353 stacked. */
2354 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2355 return 0;
2356
2357 /* On StrongARM, conditional returns are expensive if they aren't
2358 taken and multiple registers have been stacked. */
2359 if (iscond && arm_tune_strongarm)
2360 {
2361 /* Conditional return when just the LR is stored is a simple
2362 conditional-load instruction, that's not expensive. */
2363 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2364 return 0;
2365
2366 if (flag_pic
2367 && arm_pic_register != INVALID_REGNUM
2368 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2369 return 0;
2370 }
2371
2372 /* If there are saved registers but the LR isn't saved, then we need
2373 two instructions for the return. */
2374 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2375 return 0;
2376
2377 /* Can't be done if any of the VFP regs are pushed,
2378 since this also requires an insn. */
2379 if (TARGET_HARD_FLOAT && TARGET_VFP)
2380 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2381 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2382 return 0;
2383
2384 if (TARGET_REALLY_IWMMXT)
2385 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2386 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2387 return 0;
2388
2389 return 1;
2390 }
2391
2392 /* Return TRUE if int I is a valid immediate ARM constant. */
2393
2394 int
2395 const_ok_for_arm (HOST_WIDE_INT i)
2396 {
2397 int lowbit;
2398
2399 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2400 be all zero, or all one. */
2401 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2402 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2403 != ((~(unsigned HOST_WIDE_INT) 0)
2404 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2405 return FALSE;
2406
2407 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2408
2409 /* Fast return for 0 and small values. We must do this for zero, since
2410 the code below can't handle that one case. */
2411 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2412 return TRUE;
2413
2414 /* Get the number of trailing zeros. */
2415 lowbit = ffs((int) i) - 1;
2416
2417 /* Only even shifts are allowed in ARM mode so round down to the
2418 nearest even number. */
2419 if (TARGET_ARM)
2420 lowbit &= ~1;
2421
2422 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2423 return TRUE;
2424
2425 if (TARGET_ARM)
2426 {
2427 /* Allow rotated constants in ARM mode. */
2428 if (lowbit <= 4
2429 && ((i & ~0xc000003f) == 0
2430 || (i & ~0xf000000f) == 0
2431 || (i & ~0xfc000003) == 0))
2432 return TRUE;
2433 }
2434 else
2435 {
2436 HOST_WIDE_INT v;
2437
2438 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2439 v = i & 0xff;
2440 v |= v << 16;
2441 if (i == v || i == (v | (v << 8)))
2442 return TRUE;
2443
2444 /* Allow repeated pattern 0xXY00XY00. */
2445 v = i & 0xff00;
2446 v |= v << 16;
2447 if (i == v)
2448 return TRUE;
2449 }
2450
2451 return FALSE;
2452 }
2453
2454 /* Return true if I is a valid constant for the operation CODE. */
2455 int
2456 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2457 {
2458 if (const_ok_for_arm (i))
2459 return 1;
2460
2461 switch (code)
2462 {
2463 case SET:
2464 /* See if we can use movw. */
2465 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2466 return 1;
2467 else
2468 /* Otherwise, try mvn. */
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2470
2471 case PLUS:
2472 /* See if we can use addw or subw. */
2473 if (TARGET_THUMB2
2474 && ((i & 0xfffff000) == 0
2475 || ((-i) & 0xfffff000) == 0))
2476 return 1;
2477 /* else fall through. */
2478
2479 case COMPARE:
2480 case EQ:
2481 case NE:
2482 case GT:
2483 case LE:
2484 case LT:
2485 case GE:
2486 case GEU:
2487 case LTU:
2488 case GTU:
2489 case LEU:
2490 case UNORDERED:
2491 case ORDERED:
2492 case UNEQ:
2493 case UNGE:
2494 case UNLT:
2495 case UNGT:
2496 case UNLE:
2497 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2498
2499 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2500 case XOR:
2501 return 0;
2502
2503 case IOR:
2504 if (TARGET_THUMB2)
2505 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2506 return 0;
2507
2508 case AND:
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2510
2511 default:
2512 gcc_unreachable ();
2513 }
2514 }
2515
2516 /* Return true if I is a valid di mode constant for the operation CODE. */
2517 int
2518 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2519 {
2520 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2521 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2522 rtx hi = GEN_INT (hi_val);
2523 rtx lo = GEN_INT (lo_val);
2524
2525 if (TARGET_THUMB1)
2526 return 0;
2527
2528 switch (code)
2529 {
2530 case PLUS:
2531 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2532
2533 default:
2534 return 0;
2535 }
2536 }
2537
2538 /* Emit a sequence of insns to handle a large constant.
2539 CODE is the code of the operation required, it can be any of SET, PLUS,
2540 IOR, AND, XOR, MINUS;
2541 MODE is the mode in which the operation is being performed;
2542 VAL is the integer to operate on;
2543 SOURCE is the other operand (a register, or a null-pointer for SET);
2544 SUBTARGETS means it is safe to create scratch registers if that will
2545 either produce a simpler sequence, or we will want to cse the values.
2546 Return value is the number of insns emitted. */
2547
2548 /* ??? Tweak this for thumb2. */
2549 int
2550 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2551 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2552 {
2553 rtx cond;
2554
2555 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2556 cond = COND_EXEC_TEST (PATTERN (insn));
2557 else
2558 cond = NULL_RTX;
2559
2560 if (subtargets || code == SET
2561 || (REG_P (target) && REG_P (source)
2562 && REGNO (target) != REGNO (source)))
2563 {
2564 /* After arm_reorg has been called, we can't fix up expensive
2565 constants by pushing them into memory so we must synthesize
2566 them in-line, regardless of the cost. This is only likely to
2567 be more costly on chips that have load delay slots and we are
2568 compiling without running the scheduler (so no splitting
2569 occurred before the final instruction emission).
2570
2571 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2572 */
2573 if (!after_arm_reorg
2574 && !cond
2575 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2576 1, 0)
2577 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2578 + (code != SET))))
2579 {
2580 if (code == SET)
2581 {
2582 /* Currently SET is the only monadic value for CODE, all
2583 the rest are diadic. */
2584 if (TARGET_USE_MOVT)
2585 arm_emit_movpair (target, GEN_INT (val));
2586 else
2587 emit_set_insn (target, GEN_INT (val));
2588
2589 return 1;
2590 }
2591 else
2592 {
2593 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2594
2595 if (TARGET_USE_MOVT)
2596 arm_emit_movpair (temp, GEN_INT (val));
2597 else
2598 emit_set_insn (temp, GEN_INT (val));
2599
2600 /* For MINUS, the value is subtracted from, since we never
2601 have subtraction of a constant. */
2602 if (code == MINUS)
2603 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2604 else
2605 emit_set_insn (target,
2606 gen_rtx_fmt_ee (code, mode, source, temp));
2607 return 2;
2608 }
2609 }
2610 }
2611
2612 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2613 1);
2614 }
2615
2616 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2617 ARM/THUMB2 immediates, and add up to VAL.
2618 Thr function return value gives the number of insns required. */
2619 static int
2620 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2621 struct four_ints *return_sequence)
2622 {
2623 int best_consecutive_zeros = 0;
2624 int i;
2625 int best_start = 0;
2626 int insns1, insns2;
2627 struct four_ints tmp_sequence;
2628
2629 /* If we aren't targeting ARM, the best place to start is always at
2630 the bottom, otherwise look more closely. */
2631 if (TARGET_ARM)
2632 {
2633 for (i = 0; i < 32; i += 2)
2634 {
2635 int consecutive_zeros = 0;
2636
2637 if (!(val & (3 << i)))
2638 {
2639 while ((i < 32) && !(val & (3 << i)))
2640 {
2641 consecutive_zeros += 2;
2642 i += 2;
2643 }
2644 if (consecutive_zeros > best_consecutive_zeros)
2645 {
2646 best_consecutive_zeros = consecutive_zeros;
2647 best_start = i - consecutive_zeros;
2648 }
2649 i -= 2;
2650 }
2651 }
2652 }
2653
2654 /* So long as it won't require any more insns to do so, it's
2655 desirable to emit a small constant (in bits 0...9) in the last
2656 insn. This way there is more chance that it can be combined with
2657 a later addressing insn to form a pre-indexed load or store
2658 operation. Consider:
2659
2660 *((volatile int *)0xe0000100) = 1;
2661 *((volatile int *)0xe0000110) = 2;
2662
2663 We want this to wind up as:
2664
2665 mov rA, #0xe0000000
2666 mov rB, #1
2667 str rB, [rA, #0x100]
2668 mov rB, #2
2669 str rB, [rA, #0x110]
2670
2671 rather than having to synthesize both large constants from scratch.
2672
2673 Therefore, we calculate how many insns would be required to emit
2674 the constant starting from `best_start', and also starting from
2675 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2676 yield a shorter sequence, we may as well use zero. */
2677 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2678 if (best_start != 0
2679 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2680 {
2681 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2682 if (insns2 <= insns1)
2683 {
2684 *return_sequence = tmp_sequence;
2685 insns1 = insns2;
2686 }
2687 }
2688
2689 return insns1;
2690 }
2691
2692 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2693 static int
2694 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2695 struct four_ints *return_sequence, int i)
2696 {
2697 int remainder = val & 0xffffffff;
2698 int insns = 0;
2699
2700 /* Try and find a way of doing the job in either two or three
2701 instructions.
2702
2703 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2704 location. We start at position I. This may be the MSB, or
2705 optimial_immediate_sequence may have positioned it at the largest block
2706 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2707 wrapping around to the top of the word when we drop off the bottom.
2708 In the worst case this code should produce no more than four insns.
2709
2710 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2711 constants, shifted to any arbitrary location. We should always start
2712 at the MSB. */
2713 do
2714 {
2715 int end;
2716 unsigned int b1, b2, b3, b4;
2717 unsigned HOST_WIDE_INT result;
2718 int loc;
2719
2720 gcc_assert (insns < 4);
2721
2722 if (i <= 0)
2723 i += 32;
2724
2725 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2726 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2727 {
2728 loc = i;
2729 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2730 /* We can use addw/subw for the last 12 bits. */
2731 result = remainder;
2732 else
2733 {
2734 /* Use an 8-bit shifted/rotated immediate. */
2735 end = i - 8;
2736 if (end < 0)
2737 end += 32;
2738 result = remainder & ((0x0ff << end)
2739 | ((i < end) ? (0xff >> (32 - end))
2740 : 0));
2741 i -= 8;
2742 }
2743 }
2744 else
2745 {
2746 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2747 arbitrary shifts. */
2748 i -= TARGET_ARM ? 2 : 1;
2749 continue;
2750 }
2751
2752 /* Next, see if we can do a better job with a thumb2 replicated
2753 constant.
2754
2755 We do it this way around to catch the cases like 0x01F001E0 where
2756 two 8-bit immediates would work, but a replicated constant would
2757 make it worse.
2758
2759 TODO: 16-bit constants that don't clear all the bits, but still win.
2760 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2761 if (TARGET_THUMB2)
2762 {
2763 b1 = (remainder & 0xff000000) >> 24;
2764 b2 = (remainder & 0x00ff0000) >> 16;
2765 b3 = (remainder & 0x0000ff00) >> 8;
2766 b4 = remainder & 0xff;
2767
2768 if (loc > 24)
2769 {
2770 /* The 8-bit immediate already found clears b1 (and maybe b2),
2771 but must leave b3 and b4 alone. */
2772
2773 /* First try to find a 32-bit replicated constant that clears
2774 almost everything. We can assume that we can't do it in one,
2775 or else we wouldn't be here. */
2776 unsigned int tmp = b1 & b2 & b3 & b4;
2777 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2778 + (tmp << 24);
2779 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2780 + (tmp == b3) + (tmp == b4);
2781 if (tmp
2782 && (matching_bytes >= 3
2783 || (matching_bytes == 2
2784 && const_ok_for_op (remainder & ~tmp2, code))))
2785 {
2786 /* At least 3 of the bytes match, and the fourth has at
2787 least as many bits set, or two of the bytes match
2788 and it will only require one more insn to finish. */
2789 result = tmp2;
2790 i = tmp != b1 ? 32
2791 : tmp != b2 ? 24
2792 : tmp != b3 ? 16
2793 : 8;
2794 }
2795
2796 /* Second, try to find a 16-bit replicated constant that can
2797 leave three of the bytes clear. If b2 or b4 is already
2798 zero, then we can. If the 8-bit from above would not
2799 clear b2 anyway, then we still win. */
2800 else if (b1 == b3 && (!b2 || !b4
2801 || (remainder & 0x00ff0000 & ~result)))
2802 {
2803 result = remainder & 0xff00ff00;
2804 i = 24;
2805 }
2806 }
2807 else if (loc > 16)
2808 {
2809 /* The 8-bit immediate already found clears b2 (and maybe b3)
2810 and we don't get here unless b1 is alredy clear, but it will
2811 leave b4 unchanged. */
2812
2813 /* If we can clear b2 and b4 at once, then we win, since the
2814 8-bits couldn't possibly reach that far. */
2815 if (b2 == b4)
2816 {
2817 result = remainder & 0x00ff00ff;
2818 i = 16;
2819 }
2820 }
2821 }
2822
2823 return_sequence->i[insns++] = result;
2824 remainder &= ~result;
2825
2826 if (code == SET || code == MINUS)
2827 code = PLUS;
2828 }
2829 while (remainder);
2830
2831 return insns;
2832 }
2833
2834 /* Emit an instruction with the indicated PATTERN. If COND is
2835 non-NULL, conditionalize the execution of the instruction on COND
2836 being true. */
2837
2838 static void
2839 emit_constant_insn (rtx cond, rtx pattern)
2840 {
2841 if (cond)
2842 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2843 emit_insn (pattern);
2844 }
2845
2846 /* As above, but extra parameter GENERATE which, if clear, suppresses
2847 RTL generation. */
2848
2849 static int
2850 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2851 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2852 int generate)
2853 {
2854 int can_invert = 0;
2855 int can_negate = 0;
2856 int final_invert = 0;
2857 int i;
2858 int set_sign_bit_copies = 0;
2859 int clear_sign_bit_copies = 0;
2860 int clear_zero_bit_copies = 0;
2861 int set_zero_bit_copies = 0;
2862 int insns = 0, neg_insns, inv_insns;
2863 unsigned HOST_WIDE_INT temp1, temp2;
2864 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2865 struct four_ints *immediates;
2866 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2867
2868 /* Find out which operations are safe for a given CODE. Also do a quick
2869 check for degenerate cases; these can occur when DImode operations
2870 are split. */
2871 switch (code)
2872 {
2873 case SET:
2874 can_invert = 1;
2875 break;
2876
2877 case PLUS:
2878 can_negate = 1;
2879 break;
2880
2881 case IOR:
2882 if (remainder == 0xffffffff)
2883 {
2884 if (generate)
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target,
2887 GEN_INT (ARM_SIGN_EXTEND (val))));
2888 return 1;
2889 }
2890
2891 if (remainder == 0)
2892 {
2893 if (reload_completed && rtx_equal_p (target, source))
2894 return 0;
2895
2896 if (generate)
2897 emit_constant_insn (cond,
2898 gen_rtx_SET (VOIDmode, target, source));
2899 return 1;
2900 }
2901 break;
2902
2903 case AND:
2904 if (remainder == 0)
2905 {
2906 if (generate)
2907 emit_constant_insn (cond,
2908 gen_rtx_SET (VOIDmode, target, const0_rtx));
2909 return 1;
2910 }
2911 if (remainder == 0xffffffff)
2912 {
2913 if (reload_completed && rtx_equal_p (target, source))
2914 return 0;
2915 if (generate)
2916 emit_constant_insn (cond,
2917 gen_rtx_SET (VOIDmode, target, source));
2918 return 1;
2919 }
2920 can_invert = 1;
2921 break;
2922
2923 case XOR:
2924 if (remainder == 0)
2925 {
2926 if (reload_completed && rtx_equal_p (target, source))
2927 return 0;
2928 if (generate)
2929 emit_constant_insn (cond,
2930 gen_rtx_SET (VOIDmode, target, source));
2931 return 1;
2932 }
2933
2934 if (remainder == 0xffffffff)
2935 {
2936 if (generate)
2937 emit_constant_insn (cond,
2938 gen_rtx_SET (VOIDmode, target,
2939 gen_rtx_NOT (mode, source)));
2940 return 1;
2941 }
2942 final_invert = 1;
2943 break;
2944
2945 case MINUS:
2946 /* We treat MINUS as (val - source), since (source - val) is always
2947 passed as (source + (-val)). */
2948 if (remainder == 0)
2949 {
2950 if (generate)
2951 emit_constant_insn (cond,
2952 gen_rtx_SET (VOIDmode, target,
2953 gen_rtx_NEG (mode, source)));
2954 return 1;
2955 }
2956 if (const_ok_for_arm (val))
2957 {
2958 if (generate)
2959 emit_constant_insn (cond,
2960 gen_rtx_SET (VOIDmode, target,
2961 gen_rtx_MINUS (mode, GEN_INT (val),
2962 source)));
2963 return 1;
2964 }
2965
2966 break;
2967
2968 default:
2969 gcc_unreachable ();
2970 }
2971
2972 /* If we can do it in one insn get out quickly. */
2973 if (const_ok_for_op (val, code))
2974 {
2975 if (generate)
2976 emit_constant_insn (cond,
2977 gen_rtx_SET (VOIDmode, target,
2978 (source
2979 ? gen_rtx_fmt_ee (code, mode, source,
2980 GEN_INT (val))
2981 : GEN_INT (val))));
2982 return 1;
2983 }
2984
2985 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2986 insn. */
2987 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
2988 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
2989 {
2990 if (generate)
2991 {
2992 if (mode == SImode && i == 16)
2993 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
2994 smaller insn. */
2995 emit_constant_insn (cond,
2996 gen_zero_extendhisi2
2997 (target, gen_lowpart (HImode, source)));
2998 else
2999 /* Extz only supports SImode, but we can coerce the operands
3000 into that mode. */
3001 emit_constant_insn (cond,
3002 gen_extzv_t2 (gen_lowpart (SImode, target),
3003 gen_lowpart (SImode, source),
3004 GEN_INT (i), const0_rtx));
3005 }
3006
3007 return 1;
3008 }
3009
3010 /* Calculate a few attributes that may be useful for specific
3011 optimizations. */
3012 /* Count number of leading zeros. */
3013 for (i = 31; i >= 0; i--)
3014 {
3015 if ((remainder & (1 << i)) == 0)
3016 clear_sign_bit_copies++;
3017 else
3018 break;
3019 }
3020
3021 /* Count number of leading 1's. */
3022 for (i = 31; i >= 0; i--)
3023 {
3024 if ((remainder & (1 << i)) != 0)
3025 set_sign_bit_copies++;
3026 else
3027 break;
3028 }
3029
3030 /* Count number of trailing zero's. */
3031 for (i = 0; i <= 31; i++)
3032 {
3033 if ((remainder & (1 << i)) == 0)
3034 clear_zero_bit_copies++;
3035 else
3036 break;
3037 }
3038
3039 /* Count number of trailing 1's. */
3040 for (i = 0; i <= 31; i++)
3041 {
3042 if ((remainder & (1 << i)) != 0)
3043 set_zero_bit_copies++;
3044 else
3045 break;
3046 }
3047
3048 switch (code)
3049 {
3050 case SET:
3051 /* See if we can do this by sign_extending a constant that is known
3052 to be negative. This is a good, way of doing it, since the shift
3053 may well merge into a subsequent insn. */
3054 if (set_sign_bit_copies > 1)
3055 {
3056 if (const_ok_for_arm
3057 (temp1 = ARM_SIGN_EXTEND (remainder
3058 << (set_sign_bit_copies - 1))))
3059 {
3060 if (generate)
3061 {
3062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3063 emit_constant_insn (cond,
3064 gen_rtx_SET (VOIDmode, new_src,
3065 GEN_INT (temp1)));
3066 emit_constant_insn (cond,
3067 gen_ashrsi3 (target, new_src,
3068 GEN_INT (set_sign_bit_copies - 1)));
3069 }
3070 return 2;
3071 }
3072 /* For an inverted constant, we will need to set the low bits,
3073 these will be shifted out of harm's way. */
3074 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3075 if (const_ok_for_arm (~temp1))
3076 {
3077 if (generate)
3078 {
3079 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3080 emit_constant_insn (cond,
3081 gen_rtx_SET (VOIDmode, new_src,
3082 GEN_INT (temp1)));
3083 emit_constant_insn (cond,
3084 gen_ashrsi3 (target, new_src,
3085 GEN_INT (set_sign_bit_copies - 1)));
3086 }
3087 return 2;
3088 }
3089 }
3090
3091 /* See if we can calculate the value as the difference between two
3092 valid immediates. */
3093 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3094 {
3095 int topshift = clear_sign_bit_copies & ~1;
3096
3097 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3098 & (0xff000000 >> topshift));
3099
3100 /* If temp1 is zero, then that means the 9 most significant
3101 bits of remainder were 1 and we've caused it to overflow.
3102 When topshift is 0 we don't need to do anything since we
3103 can borrow from 'bit 32'. */
3104 if (temp1 == 0 && topshift != 0)
3105 temp1 = 0x80000000 >> (topshift - 1);
3106
3107 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3108
3109 if (const_ok_for_arm (temp2))
3110 {
3111 if (generate)
3112 {
3113 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3114 emit_constant_insn (cond,
3115 gen_rtx_SET (VOIDmode, new_src,
3116 GEN_INT (temp1)));
3117 emit_constant_insn (cond,
3118 gen_addsi3 (target, new_src,
3119 GEN_INT (-temp2)));
3120 }
3121
3122 return 2;
3123 }
3124 }
3125
3126 /* See if we can generate this by setting the bottom (or the top)
3127 16 bits, and then shifting these into the other half of the
3128 word. We only look for the simplest cases, to do more would cost
3129 too much. Be careful, however, not to generate this when the
3130 alternative would take fewer insns. */
3131 if (val & 0xffff0000)
3132 {
3133 temp1 = remainder & 0xffff0000;
3134 temp2 = remainder & 0x0000ffff;
3135
3136 /* Overlaps outside this range are best done using other methods. */
3137 for (i = 9; i < 24; i++)
3138 {
3139 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3140 && !const_ok_for_arm (temp2))
3141 {
3142 rtx new_src = (subtargets
3143 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3144 : target);
3145 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3146 source, subtargets, generate);
3147 source = new_src;
3148 if (generate)
3149 emit_constant_insn
3150 (cond,
3151 gen_rtx_SET
3152 (VOIDmode, target,
3153 gen_rtx_IOR (mode,
3154 gen_rtx_ASHIFT (mode, source,
3155 GEN_INT (i)),
3156 source)));
3157 return insns + 1;
3158 }
3159 }
3160
3161 /* Don't duplicate cases already considered. */
3162 for (i = 17; i < 24; i++)
3163 {
3164 if (((temp1 | (temp1 >> i)) == remainder)
3165 && !const_ok_for_arm (temp1))
3166 {
3167 rtx new_src = (subtargets
3168 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3169 : target);
3170 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3171 source, subtargets, generate);
3172 source = new_src;
3173 if (generate)
3174 emit_constant_insn
3175 (cond,
3176 gen_rtx_SET (VOIDmode, target,
3177 gen_rtx_IOR
3178 (mode,
3179 gen_rtx_LSHIFTRT (mode, source,
3180 GEN_INT (i)),
3181 source)));
3182 return insns + 1;
3183 }
3184 }
3185 }
3186 break;
3187
3188 case IOR:
3189 case XOR:
3190 /* If we have IOR or XOR, and the constant can be loaded in a
3191 single instruction, and we can find a temporary to put it in,
3192 then this can be done in two instructions instead of 3-4. */
3193 if (subtargets
3194 /* TARGET can't be NULL if SUBTARGETS is 0 */
3195 || (reload_completed && !reg_mentioned_p (target, source)))
3196 {
3197 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3198 {
3199 if (generate)
3200 {
3201 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3202
3203 emit_constant_insn (cond,
3204 gen_rtx_SET (VOIDmode, sub,
3205 GEN_INT (val)));
3206 emit_constant_insn (cond,
3207 gen_rtx_SET (VOIDmode, target,
3208 gen_rtx_fmt_ee (code, mode,
3209 source, sub)));
3210 }
3211 return 2;
3212 }
3213 }
3214
3215 if (code == XOR)
3216 break;
3217
3218 /* Convert.
3219 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3220 and the remainder 0s for e.g. 0xfff00000)
3221 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3222
3223 This can be done in 2 instructions by using shifts with mov or mvn.
3224 e.g. for
3225 x = x | 0xfff00000;
3226 we generate.
3227 mvn r0, r0, asl #12
3228 mvn r0, r0, lsr #12 */
3229 if (set_sign_bit_copies > 8
3230 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3231 {
3232 if (generate)
3233 {
3234 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3235 rtx shift = GEN_INT (set_sign_bit_copies);
3236
3237 emit_constant_insn
3238 (cond,
3239 gen_rtx_SET (VOIDmode, sub,
3240 gen_rtx_NOT (mode,
3241 gen_rtx_ASHIFT (mode,
3242 source,
3243 shift))));
3244 emit_constant_insn
3245 (cond,
3246 gen_rtx_SET (VOIDmode, target,
3247 gen_rtx_NOT (mode,
3248 gen_rtx_LSHIFTRT (mode, sub,
3249 shift))));
3250 }
3251 return 2;
3252 }
3253
3254 /* Convert
3255 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3256 to
3257 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3258
3259 For eg. r0 = r0 | 0xfff
3260 mvn r0, r0, lsr #12
3261 mvn r0, r0, asl #12
3262
3263 */
3264 if (set_zero_bit_copies > 8
3265 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3266 {
3267 if (generate)
3268 {
3269 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3270 rtx shift = GEN_INT (set_zero_bit_copies);
3271
3272 emit_constant_insn
3273 (cond,
3274 gen_rtx_SET (VOIDmode, sub,
3275 gen_rtx_NOT (mode,
3276 gen_rtx_LSHIFTRT (mode,
3277 source,
3278 shift))));
3279 emit_constant_insn
3280 (cond,
3281 gen_rtx_SET (VOIDmode, target,
3282 gen_rtx_NOT (mode,
3283 gen_rtx_ASHIFT (mode, sub,
3284 shift))));
3285 }
3286 return 2;
3287 }
3288
3289 /* This will never be reached for Thumb2 because orn is a valid
3290 instruction. This is for Thumb1 and the ARM 32 bit cases.
3291
3292 x = y | constant (such that ~constant is a valid constant)
3293 Transform this to
3294 x = ~(~y & ~constant).
3295 */
3296 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3297 {
3298 if (generate)
3299 {
3300 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3301 emit_constant_insn (cond,
3302 gen_rtx_SET (VOIDmode, sub,
3303 gen_rtx_NOT (mode, source)));
3304 source = sub;
3305 if (subtargets)
3306 sub = gen_reg_rtx (mode);
3307 emit_constant_insn (cond,
3308 gen_rtx_SET (VOIDmode, sub,
3309 gen_rtx_AND (mode, source,
3310 GEN_INT (temp1))));
3311 emit_constant_insn (cond,
3312 gen_rtx_SET (VOIDmode, target,
3313 gen_rtx_NOT (mode, sub)));
3314 }
3315 return 3;
3316 }
3317 break;
3318
3319 case AND:
3320 /* See if two shifts will do 2 or more insn's worth of work. */
3321 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3322 {
3323 HOST_WIDE_INT shift_mask = ((0xffffffff
3324 << (32 - clear_sign_bit_copies))
3325 & 0xffffffff);
3326
3327 if ((remainder | shift_mask) != 0xffffffff)
3328 {
3329 if (generate)
3330 {
3331 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3332 insns = arm_gen_constant (AND, mode, cond,
3333 remainder | shift_mask,
3334 new_src, source, subtargets, 1);
3335 source = new_src;
3336 }
3337 else
3338 {
3339 rtx targ = subtargets ? NULL_RTX : target;
3340 insns = arm_gen_constant (AND, mode, cond,
3341 remainder | shift_mask,
3342 targ, source, subtargets, 0);
3343 }
3344 }
3345
3346 if (generate)
3347 {
3348 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3349 rtx shift = GEN_INT (clear_sign_bit_copies);
3350
3351 emit_insn (gen_ashlsi3 (new_src, source, shift));
3352 emit_insn (gen_lshrsi3 (target, new_src, shift));
3353 }
3354
3355 return insns + 2;
3356 }
3357
3358 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3359 {
3360 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3361
3362 if ((remainder | shift_mask) != 0xffffffff)
3363 {
3364 if (generate)
3365 {
3366 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3367
3368 insns = arm_gen_constant (AND, mode, cond,
3369 remainder | shift_mask,
3370 new_src, source, subtargets, 1);
3371 source = new_src;
3372 }
3373 else
3374 {
3375 rtx targ = subtargets ? NULL_RTX : target;
3376
3377 insns = arm_gen_constant (AND, mode, cond,
3378 remainder | shift_mask,
3379 targ, source, subtargets, 0);
3380 }
3381 }
3382
3383 if (generate)
3384 {
3385 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3386 rtx shift = GEN_INT (clear_zero_bit_copies);
3387
3388 emit_insn (gen_lshrsi3 (new_src, source, shift));
3389 emit_insn (gen_ashlsi3 (target, new_src, shift));
3390 }
3391
3392 return insns + 2;
3393 }
3394
3395 break;
3396
3397 default:
3398 break;
3399 }
3400
3401 /* Calculate what the instruction sequences would be if we generated it
3402 normally, negated, or inverted. */
3403 if (code == AND)
3404 /* AND cannot be split into multiple insns, so invert and use BIC. */
3405 insns = 99;
3406 else
3407 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3408
3409 if (can_negate)
3410 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3411 &neg_immediates);
3412 else
3413 neg_insns = 99;
3414
3415 if (can_invert || final_invert)
3416 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3417 &inv_immediates);
3418 else
3419 inv_insns = 99;
3420
3421 immediates = &pos_immediates;
3422
3423 /* Is the negated immediate sequence more efficient? */
3424 if (neg_insns < insns && neg_insns <= inv_insns)
3425 {
3426 insns = neg_insns;
3427 immediates = &neg_immediates;
3428 }
3429 else
3430 can_negate = 0;
3431
3432 /* Is the inverted immediate sequence more efficient?
3433 We must allow for an extra NOT instruction for XOR operations, although
3434 there is some chance that the final 'mvn' will get optimized later. */
3435 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3436 {
3437 insns = inv_insns;
3438 immediates = &inv_immediates;
3439 }
3440 else
3441 {
3442 can_invert = 0;
3443 final_invert = 0;
3444 }
3445
3446 /* Now output the chosen sequence as instructions. */
3447 if (generate)
3448 {
3449 for (i = 0; i < insns; i++)
3450 {
3451 rtx new_src, temp1_rtx;
3452
3453 temp1 = immediates->i[i];
3454
3455 if (code == SET || code == MINUS)
3456 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3457 else if ((final_invert || i < (insns - 1)) && subtargets)
3458 new_src = gen_reg_rtx (mode);
3459 else
3460 new_src = target;
3461
3462 if (can_invert)
3463 temp1 = ~temp1;
3464 else if (can_negate)
3465 temp1 = -temp1;
3466
3467 temp1 = trunc_int_for_mode (temp1, mode);
3468 temp1_rtx = GEN_INT (temp1);
3469
3470 if (code == SET)
3471 ;
3472 else if (code == MINUS)
3473 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3474 else
3475 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3476
3477 emit_constant_insn (cond,
3478 gen_rtx_SET (VOIDmode, new_src,
3479 temp1_rtx));
3480 source = new_src;
3481
3482 if (code == SET)
3483 {
3484 can_negate = can_invert;
3485 can_invert = 0;
3486 code = PLUS;
3487 }
3488 else if (code == MINUS)
3489 code = PLUS;
3490 }
3491 }
3492
3493 if (final_invert)
3494 {
3495 if (generate)
3496 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3497 gen_rtx_NOT (mode, source)));
3498 insns++;
3499 }
3500
3501 return insns;
3502 }
3503
3504 /* Canonicalize a comparison so that we are more likely to recognize it.
3505 This can be done for a few constant compares, where we can make the
3506 immediate value easier to load. */
3507
3508 enum rtx_code
3509 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3510 {
3511 enum machine_mode mode;
3512 unsigned HOST_WIDE_INT i, maxval;
3513
3514 mode = GET_MODE (*op0);
3515 if (mode == VOIDmode)
3516 mode = GET_MODE (*op1);
3517
3518 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3519
3520 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3521 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3522 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3523 for GTU/LEU in Thumb mode. */
3524 if (mode == DImode)
3525 {
3526 rtx tem;
3527
3528 if (code == GT || code == LE
3529 || (!TARGET_ARM && (code == GTU || code == LEU)))
3530 {
3531 /* Missing comparison. First try to use an available
3532 comparison. */
3533 if (CONST_INT_P (*op1))
3534 {
3535 i = INTVAL (*op1);
3536 switch (code)
3537 {
3538 case GT:
3539 case LE:
3540 if (i != maxval
3541 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3542 {
3543 *op1 = GEN_INT (i + 1);
3544 return code == GT ? GE : LT;
3545 }
3546 break;
3547 case GTU:
3548 case LEU:
3549 if (i != ~((unsigned HOST_WIDE_INT) 0)
3550 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3551 {
3552 *op1 = GEN_INT (i + 1);
3553 return code == GTU ? GEU : LTU;
3554 }
3555 break;
3556 default:
3557 gcc_unreachable ();
3558 }
3559 }
3560
3561 /* If that did not work, reverse the condition. */
3562 tem = *op0;
3563 *op0 = *op1;
3564 *op1 = tem;
3565 return swap_condition (code);
3566 }
3567
3568 return code;
3569 }
3570
3571 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3572 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3573 to facilitate possible combining with a cmp into 'ands'. */
3574 if (mode == SImode
3575 && GET_CODE (*op0) == ZERO_EXTEND
3576 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3577 && GET_MODE (XEXP (*op0, 0)) == QImode
3578 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3579 && subreg_lowpart_p (XEXP (*op0, 0))
3580 && *op1 == const0_rtx)
3581 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3582 GEN_INT (255));
3583
3584 /* Comparisons smaller than DImode. Only adjust comparisons against
3585 an out-of-range constant. */
3586 if (!CONST_INT_P (*op1)
3587 || const_ok_for_arm (INTVAL (*op1))
3588 || const_ok_for_arm (- INTVAL (*op1)))
3589 return code;
3590
3591 i = INTVAL (*op1);
3592
3593 switch (code)
3594 {
3595 case EQ:
3596 case NE:
3597 return code;
3598
3599 case GT:
3600 case LE:
3601 if (i != maxval
3602 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3603 {
3604 *op1 = GEN_INT (i + 1);
3605 return code == GT ? GE : LT;
3606 }
3607 break;
3608
3609 case GE:
3610 case LT:
3611 if (i != ~maxval
3612 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3613 {
3614 *op1 = GEN_INT (i - 1);
3615 return code == GE ? GT : LE;
3616 }
3617 break;
3618
3619 case GTU:
3620 case LEU:
3621 if (i != ~((unsigned HOST_WIDE_INT) 0)
3622 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3623 {
3624 *op1 = GEN_INT (i + 1);
3625 return code == GTU ? GEU : LTU;
3626 }
3627 break;
3628
3629 case GEU:
3630 case LTU:
3631 if (i != 0
3632 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3633 {
3634 *op1 = GEN_INT (i - 1);
3635 return code == GEU ? GTU : LEU;
3636 }
3637 break;
3638
3639 default:
3640 gcc_unreachable ();
3641 }
3642
3643 return code;
3644 }
3645
3646
3647 /* Define how to find the value returned by a function. */
3648
3649 static rtx
3650 arm_function_value(const_tree type, const_tree func,
3651 bool outgoing ATTRIBUTE_UNUSED)
3652 {
3653 enum machine_mode mode;
3654 int unsignedp ATTRIBUTE_UNUSED;
3655 rtx r ATTRIBUTE_UNUSED;
3656
3657 mode = TYPE_MODE (type);
3658
3659 if (TARGET_AAPCS_BASED)
3660 return aapcs_allocate_return_reg (mode, type, func);
3661
3662 /* Promote integer types. */
3663 if (INTEGRAL_TYPE_P (type))
3664 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3665
3666 /* Promotes small structs returned in a register to full-word size
3667 for big-endian AAPCS. */
3668 if (arm_return_in_msb (type))
3669 {
3670 HOST_WIDE_INT size = int_size_in_bytes (type);
3671 if (size % UNITS_PER_WORD != 0)
3672 {
3673 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3674 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3675 }
3676 }
3677
3678 return arm_libcall_value_1 (mode);
3679 }
3680
3681 static int
3682 libcall_eq (const void *p1, const void *p2)
3683 {
3684 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3685 }
3686
3687 static hashval_t
3688 libcall_hash (const void *p1)
3689 {
3690 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3691 }
3692
3693 static void
3694 add_libcall (htab_t htab, rtx libcall)
3695 {
3696 *htab_find_slot (htab, libcall, INSERT) = libcall;
3697 }
3698
3699 static bool
3700 arm_libcall_uses_aapcs_base (const_rtx libcall)
3701 {
3702 static bool init_done = false;
3703 static htab_t libcall_htab;
3704
3705 if (!init_done)
3706 {
3707 init_done = true;
3708
3709 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3710 NULL);
3711 add_libcall (libcall_htab,
3712 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3713 add_libcall (libcall_htab,
3714 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3715 add_libcall (libcall_htab,
3716 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3717 add_libcall (libcall_htab,
3718 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3719
3720 add_libcall (libcall_htab,
3721 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3722 add_libcall (libcall_htab,
3723 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3724 add_libcall (libcall_htab,
3725 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3726 add_libcall (libcall_htab,
3727 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3728
3729 add_libcall (libcall_htab,
3730 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3731 add_libcall (libcall_htab,
3732 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3733 add_libcall (libcall_htab,
3734 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3735 add_libcall (libcall_htab,
3736 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3737 add_libcall (libcall_htab,
3738 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3739 add_libcall (libcall_htab,
3740 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3741 add_libcall (libcall_htab,
3742 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3743 add_libcall (libcall_htab,
3744 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3745
3746 /* Values from double-precision helper functions are returned in core
3747 registers if the selected core only supports single-precision
3748 arithmetic, even if we are using the hard-float ABI. The same is
3749 true for single-precision helpers, but we will never be using the
3750 hard-float ABI on a CPU which doesn't support single-precision
3751 operations in hardware. */
3752 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3753 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3754 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3755 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3756 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3757 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3758 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3759 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3760 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3761 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3762 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3763 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3764 SFmode));
3765 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3766 DFmode));
3767 }
3768
3769 return libcall && htab_find (libcall_htab, libcall) != NULL;
3770 }
3771
3772 static rtx
3773 arm_libcall_value_1 (enum machine_mode mode)
3774 {
3775 if (TARGET_AAPCS_BASED)
3776 return aapcs_libcall_value (mode);
3777 else if (TARGET_IWMMXT_ABI
3778 && arm_vector_mode_supported_p (mode))
3779 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3780 else
3781 return gen_rtx_REG (mode, ARG_REGISTER (1));
3782 }
3783
3784 /* Define how to find the value returned by a library function
3785 assuming the value has mode MODE. */
3786
3787 static rtx
3788 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3789 {
3790 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3791 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3792 {
3793 /* The following libcalls return their result in integer registers,
3794 even though they return a floating point value. */
3795 if (arm_libcall_uses_aapcs_base (libcall))
3796 return gen_rtx_REG (mode, ARG_REGISTER(1));
3797
3798 }
3799
3800 return arm_libcall_value_1 (mode);
3801 }
3802
3803 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3804
3805 static bool
3806 arm_function_value_regno_p (const unsigned int regno)
3807 {
3808 if (regno == ARG_REGISTER (1)
3809 || (TARGET_32BIT
3810 && TARGET_AAPCS_BASED
3811 && TARGET_VFP
3812 && TARGET_HARD_FLOAT
3813 && regno == FIRST_VFP_REGNUM)
3814 || (TARGET_IWMMXT_ABI
3815 && regno == FIRST_IWMMXT_REGNUM))
3816 return true;
3817
3818 return false;
3819 }
3820
3821 /* Determine the amount of memory needed to store the possible return
3822 registers of an untyped call. */
3823 int
3824 arm_apply_result_size (void)
3825 {
3826 int size = 16;
3827
3828 if (TARGET_32BIT)
3829 {
3830 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3831 size += 32;
3832 if (TARGET_IWMMXT_ABI)
3833 size += 8;
3834 }
3835
3836 return size;
3837 }
3838
3839 /* Decide whether TYPE should be returned in memory (true)
3840 or in a register (false). FNTYPE is the type of the function making
3841 the call. */
3842 static bool
3843 arm_return_in_memory (const_tree type, const_tree fntype)
3844 {
3845 HOST_WIDE_INT size;
3846
3847 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3848
3849 if (TARGET_AAPCS_BASED)
3850 {
3851 /* Simple, non-aggregate types (ie not including vectors and
3852 complex) are always returned in a register (or registers).
3853 We don't care about which register here, so we can short-cut
3854 some of the detail. */
3855 if (!AGGREGATE_TYPE_P (type)
3856 && TREE_CODE (type) != VECTOR_TYPE
3857 && TREE_CODE (type) != COMPLEX_TYPE)
3858 return false;
3859
3860 /* Any return value that is no larger than one word can be
3861 returned in r0. */
3862 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3863 return false;
3864
3865 /* Check any available co-processors to see if they accept the
3866 type as a register candidate (VFP, for example, can return
3867 some aggregates in consecutive registers). These aren't
3868 available if the call is variadic. */
3869 if (aapcs_select_return_coproc (type, fntype) >= 0)
3870 return false;
3871
3872 /* Vector values should be returned using ARM registers, not
3873 memory (unless they're over 16 bytes, which will break since
3874 we only have four call-clobbered registers to play with). */
3875 if (TREE_CODE (type) == VECTOR_TYPE)
3876 return (size < 0 || size > (4 * UNITS_PER_WORD));
3877
3878 /* The rest go in memory. */
3879 return true;
3880 }
3881
3882 if (TREE_CODE (type) == VECTOR_TYPE)
3883 return (size < 0 || size > (4 * UNITS_PER_WORD));
3884
3885 if (!AGGREGATE_TYPE_P (type) &&
3886 (TREE_CODE (type) != VECTOR_TYPE))
3887 /* All simple types are returned in registers. */
3888 return false;
3889
3890 if (arm_abi != ARM_ABI_APCS)
3891 {
3892 /* ATPCS and later return aggregate types in memory only if they are
3893 larger than a word (or are variable size). */
3894 return (size < 0 || size > UNITS_PER_WORD);
3895 }
3896
3897 /* For the arm-wince targets we choose to be compatible with Microsoft's
3898 ARM and Thumb compilers, which always return aggregates in memory. */
3899 #ifndef ARM_WINCE
3900 /* All structures/unions bigger than one word are returned in memory.
3901 Also catch the case where int_size_in_bytes returns -1. In this case
3902 the aggregate is either huge or of variable size, and in either case
3903 we will want to return it via memory and not in a register. */
3904 if (size < 0 || size > UNITS_PER_WORD)
3905 return true;
3906
3907 if (TREE_CODE (type) == RECORD_TYPE)
3908 {
3909 tree field;
3910
3911 /* For a struct the APCS says that we only return in a register
3912 if the type is 'integer like' and every addressable element
3913 has an offset of zero. For practical purposes this means
3914 that the structure can have at most one non bit-field element
3915 and that this element must be the first one in the structure. */
3916
3917 /* Find the first field, ignoring non FIELD_DECL things which will
3918 have been created by C++. */
3919 for (field = TYPE_FIELDS (type);
3920 field && TREE_CODE (field) != FIELD_DECL;
3921 field = DECL_CHAIN (field))
3922 continue;
3923
3924 if (field == NULL)
3925 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3926
3927 /* Check that the first field is valid for returning in a register. */
3928
3929 /* ... Floats are not allowed */
3930 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3931 return true;
3932
3933 /* ... Aggregates that are not themselves valid for returning in
3934 a register are not allowed. */
3935 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3936 return true;
3937
3938 /* Now check the remaining fields, if any. Only bitfields are allowed,
3939 since they are not addressable. */
3940 for (field = DECL_CHAIN (field);
3941 field;
3942 field = DECL_CHAIN (field))
3943 {
3944 if (TREE_CODE (field) != FIELD_DECL)
3945 continue;
3946
3947 if (!DECL_BIT_FIELD_TYPE (field))
3948 return true;
3949 }
3950
3951 return false;
3952 }
3953
3954 if (TREE_CODE (type) == UNION_TYPE)
3955 {
3956 tree field;
3957
3958 /* Unions can be returned in registers if every element is
3959 integral, or can be returned in an integer register. */
3960 for (field = TYPE_FIELDS (type);
3961 field;
3962 field = DECL_CHAIN (field))
3963 {
3964 if (TREE_CODE (field) != FIELD_DECL)
3965 continue;
3966
3967 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3968 return true;
3969
3970 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3971 return true;
3972 }
3973
3974 return false;
3975 }
3976 #endif /* not ARM_WINCE */
3977
3978 /* Return all other types in memory. */
3979 return true;
3980 }
3981
3982 const struct pcs_attribute_arg
3983 {
3984 const char *arg;
3985 enum arm_pcs value;
3986 } pcs_attribute_args[] =
3987 {
3988 {"aapcs", ARM_PCS_AAPCS},
3989 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3990 #if 0
3991 /* We could recognize these, but changes would be needed elsewhere
3992 * to implement them. */
3993 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3994 {"atpcs", ARM_PCS_ATPCS},
3995 {"apcs", ARM_PCS_APCS},
3996 #endif
3997 {NULL, ARM_PCS_UNKNOWN}
3998 };
3999
4000 static enum arm_pcs
4001 arm_pcs_from_attribute (tree attr)
4002 {
4003 const struct pcs_attribute_arg *ptr;
4004 const char *arg;
4005
4006 /* Get the value of the argument. */
4007 if (TREE_VALUE (attr) == NULL_TREE
4008 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4009 return ARM_PCS_UNKNOWN;
4010
4011 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4012
4013 /* Check it against the list of known arguments. */
4014 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4015 if (streq (arg, ptr->arg))
4016 return ptr->value;
4017
4018 /* An unrecognized interrupt type. */
4019 return ARM_PCS_UNKNOWN;
4020 }
4021
4022 /* Get the PCS variant to use for this call. TYPE is the function's type
4023 specification, DECL is the specific declartion. DECL may be null if
4024 the call could be indirect or if this is a library call. */
4025 static enum arm_pcs
4026 arm_get_pcs_model (const_tree type, const_tree decl)
4027 {
4028 bool user_convention = false;
4029 enum arm_pcs user_pcs = arm_pcs_default;
4030 tree attr;
4031
4032 gcc_assert (type);
4033
4034 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4035 if (attr)
4036 {
4037 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4038 user_convention = true;
4039 }
4040
4041 if (TARGET_AAPCS_BASED)
4042 {
4043 /* Detect varargs functions. These always use the base rules
4044 (no argument is ever a candidate for a co-processor
4045 register). */
4046 bool base_rules = stdarg_p (type);
4047
4048 if (user_convention)
4049 {
4050 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4051 sorry ("non-AAPCS derived PCS variant");
4052 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4053 error ("variadic functions must use the base AAPCS variant");
4054 }
4055
4056 if (base_rules)
4057 return ARM_PCS_AAPCS;
4058 else if (user_convention)
4059 return user_pcs;
4060 else if (decl && flag_unit_at_a_time)
4061 {
4062 /* Local functions never leak outside this compilation unit,
4063 so we are free to use whatever conventions are
4064 appropriate. */
4065 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4066 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4067 if (i && i->local)
4068 return ARM_PCS_AAPCS_LOCAL;
4069 }
4070 }
4071 else if (user_convention && user_pcs != arm_pcs_default)
4072 sorry ("PCS variant");
4073
4074 /* For everything else we use the target's default. */
4075 return arm_pcs_default;
4076 }
4077
4078
4079 static void
4080 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4081 const_tree fntype ATTRIBUTE_UNUSED,
4082 rtx libcall ATTRIBUTE_UNUSED,
4083 const_tree fndecl ATTRIBUTE_UNUSED)
4084 {
4085 /* Record the unallocated VFP registers. */
4086 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4087 pcum->aapcs_vfp_reg_alloc = 0;
4088 }
4089
4090 /* Walk down the type tree of TYPE counting consecutive base elements.
4091 If *MODEP is VOIDmode, then set it to the first valid floating point
4092 type. If a non-floating point type is found, or if a floating point
4093 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4094 otherwise return the count in the sub-tree. */
4095 static int
4096 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4097 {
4098 enum machine_mode mode;
4099 HOST_WIDE_INT size;
4100
4101 switch (TREE_CODE (type))
4102 {
4103 case REAL_TYPE:
4104 mode = TYPE_MODE (type);
4105 if (mode != DFmode && mode != SFmode)
4106 return -1;
4107
4108 if (*modep == VOIDmode)
4109 *modep = mode;
4110
4111 if (*modep == mode)
4112 return 1;
4113
4114 break;
4115
4116 case COMPLEX_TYPE:
4117 mode = TYPE_MODE (TREE_TYPE (type));
4118 if (mode != DFmode && mode != SFmode)
4119 return -1;
4120
4121 if (*modep == VOIDmode)
4122 *modep = mode;
4123
4124 if (*modep == mode)
4125 return 2;
4126
4127 break;
4128
4129 case VECTOR_TYPE:
4130 /* Use V2SImode and V4SImode as representatives of all 64-bit
4131 and 128-bit vector types, whether or not those modes are
4132 supported with the present options. */
4133 size = int_size_in_bytes (type);
4134 switch (size)
4135 {
4136 case 8:
4137 mode = V2SImode;
4138 break;
4139 case 16:
4140 mode = V4SImode;
4141 break;
4142 default:
4143 return -1;
4144 }
4145
4146 if (*modep == VOIDmode)
4147 *modep = mode;
4148
4149 /* Vector modes are considered to be opaque: two vectors are
4150 equivalent for the purposes of being homogeneous aggregates
4151 if they are the same size. */
4152 if (*modep == mode)
4153 return 1;
4154
4155 break;
4156
4157 case ARRAY_TYPE:
4158 {
4159 int count;
4160 tree index = TYPE_DOMAIN (type);
4161
4162 /* Can't handle incomplete types. */
4163 if (!COMPLETE_TYPE_P (type))
4164 return -1;
4165
4166 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4167 if (count == -1
4168 || !index
4169 || !TYPE_MAX_VALUE (index)
4170 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4171 || !TYPE_MIN_VALUE (index)
4172 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4173 || count < 0)
4174 return -1;
4175
4176 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4177 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4178
4179 /* There must be no padding. */
4180 if (!host_integerp (TYPE_SIZE (type), 1)
4181 || (tree_low_cst (TYPE_SIZE (type), 1)
4182 != count * GET_MODE_BITSIZE (*modep)))
4183 return -1;
4184
4185 return count;
4186 }
4187
4188 case RECORD_TYPE:
4189 {
4190 int count = 0;
4191 int sub_count;
4192 tree field;
4193
4194 /* Can't handle incomplete types. */
4195 if (!COMPLETE_TYPE_P (type))
4196 return -1;
4197
4198 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4199 {
4200 if (TREE_CODE (field) != FIELD_DECL)
4201 continue;
4202
4203 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4204 if (sub_count < 0)
4205 return -1;
4206 count += sub_count;
4207 }
4208
4209 /* There must be no padding. */
4210 if (!host_integerp (TYPE_SIZE (type), 1)
4211 || (tree_low_cst (TYPE_SIZE (type), 1)
4212 != count * GET_MODE_BITSIZE (*modep)))
4213 return -1;
4214
4215 return count;
4216 }
4217
4218 case UNION_TYPE:
4219 case QUAL_UNION_TYPE:
4220 {
4221 /* These aren't very interesting except in a degenerate case. */
4222 int count = 0;
4223 int sub_count;
4224 tree field;
4225
4226 /* Can't handle incomplete types. */
4227 if (!COMPLETE_TYPE_P (type))
4228 return -1;
4229
4230 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4231 {
4232 if (TREE_CODE (field) != FIELD_DECL)
4233 continue;
4234
4235 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4236 if (sub_count < 0)
4237 return -1;
4238 count = count > sub_count ? count : sub_count;
4239 }
4240
4241 /* There must be no padding. */
4242 if (!host_integerp (TYPE_SIZE (type), 1)
4243 || (tree_low_cst (TYPE_SIZE (type), 1)
4244 != count * GET_MODE_BITSIZE (*modep)))
4245 return -1;
4246
4247 return count;
4248 }
4249
4250 default:
4251 break;
4252 }
4253
4254 return -1;
4255 }
4256
4257 /* Return true if PCS_VARIANT should use VFP registers. */
4258 static bool
4259 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4260 {
4261 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4262 {
4263 static bool seen_thumb1_vfp = false;
4264
4265 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4266 {
4267 sorry ("Thumb-1 hard-float VFP ABI");
4268 /* sorry() is not immediately fatal, so only display this once. */
4269 seen_thumb1_vfp = true;
4270 }
4271
4272 return true;
4273 }
4274
4275 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4276 return false;
4277
4278 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4279 (TARGET_VFP_DOUBLE || !is_double));
4280 }
4281
4282 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4283 suitable for passing or returning in VFP registers for the PCS
4284 variant selected. If it is, then *BASE_MODE is updated to contain
4285 a machine mode describing each element of the argument's type and
4286 *COUNT to hold the number of such elements. */
4287 static bool
4288 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4289 enum machine_mode mode, const_tree type,
4290 enum machine_mode *base_mode, int *count)
4291 {
4292 enum machine_mode new_mode = VOIDmode;
4293
4294 /* If we have the type information, prefer that to working things
4295 out from the mode. */
4296 if (type)
4297 {
4298 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4299
4300 if (ag_count > 0 && ag_count <= 4)
4301 *count = ag_count;
4302 else
4303 return false;
4304 }
4305 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4306 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4307 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4308 {
4309 *count = 1;
4310 new_mode = mode;
4311 }
4312 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4313 {
4314 *count = 2;
4315 new_mode = (mode == DCmode ? DFmode : SFmode);
4316 }
4317 else
4318 return false;
4319
4320
4321 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4322 return false;
4323
4324 *base_mode = new_mode;
4325 return true;
4326 }
4327
4328 static bool
4329 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4330 enum machine_mode mode, const_tree type)
4331 {
4332 int count ATTRIBUTE_UNUSED;
4333 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4334
4335 if (!use_vfp_abi (pcs_variant, false))
4336 return false;
4337 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4338 &ag_mode, &count);
4339 }
4340
4341 static bool
4342 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4343 const_tree type)
4344 {
4345 if (!use_vfp_abi (pcum->pcs_variant, false))
4346 return false;
4347
4348 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4349 &pcum->aapcs_vfp_rmode,
4350 &pcum->aapcs_vfp_rcount);
4351 }
4352
4353 static bool
4354 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 const_tree type ATTRIBUTE_UNUSED)
4356 {
4357 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4358 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4359 int regno;
4360
4361 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4362 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4363 {
4364 pcum->aapcs_vfp_reg_alloc = mask << regno;
4365 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4366 {
4367 int i;
4368 int rcount = pcum->aapcs_vfp_rcount;
4369 int rshift = shift;
4370 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4371 rtx par;
4372 if (!TARGET_NEON)
4373 {
4374 /* Avoid using unsupported vector modes. */
4375 if (rmode == V2SImode)
4376 rmode = DImode;
4377 else if (rmode == V4SImode)
4378 {
4379 rmode = DImode;
4380 rcount *= 2;
4381 rshift /= 2;
4382 }
4383 }
4384 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4385 for (i = 0; i < rcount; i++)
4386 {
4387 rtx tmp = gen_rtx_REG (rmode,
4388 FIRST_VFP_REGNUM + regno + i * rshift);
4389 tmp = gen_rtx_EXPR_LIST
4390 (VOIDmode, tmp,
4391 GEN_INT (i * GET_MODE_SIZE (rmode)));
4392 XVECEXP (par, 0, i) = tmp;
4393 }
4394
4395 pcum->aapcs_reg = par;
4396 }
4397 else
4398 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4399 return true;
4400 }
4401 return false;
4402 }
4403
4404 static rtx
4405 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4406 enum machine_mode mode,
4407 const_tree type ATTRIBUTE_UNUSED)
4408 {
4409 if (!use_vfp_abi (pcs_variant, false))
4410 return NULL;
4411
4412 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4413 {
4414 int count;
4415 enum machine_mode ag_mode;
4416 int i;
4417 rtx par;
4418 int shift;
4419
4420 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4421 &ag_mode, &count);
4422
4423 if (!TARGET_NEON)
4424 {
4425 if (ag_mode == V2SImode)
4426 ag_mode = DImode;
4427 else if (ag_mode == V4SImode)
4428 {
4429 ag_mode = DImode;
4430 count *= 2;
4431 }
4432 }
4433 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4434 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4435 for (i = 0; i < count; i++)
4436 {
4437 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4438 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4439 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4440 XVECEXP (par, 0, i) = tmp;
4441 }
4442
4443 return par;
4444 }
4445
4446 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4447 }
4448
4449 static void
4450 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4451 enum machine_mode mode ATTRIBUTE_UNUSED,
4452 const_tree type ATTRIBUTE_UNUSED)
4453 {
4454 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4455 pcum->aapcs_vfp_reg_alloc = 0;
4456 return;
4457 }
4458
4459 #define AAPCS_CP(X) \
4460 { \
4461 aapcs_ ## X ## _cum_init, \
4462 aapcs_ ## X ## _is_call_candidate, \
4463 aapcs_ ## X ## _allocate, \
4464 aapcs_ ## X ## _is_return_candidate, \
4465 aapcs_ ## X ## _allocate_return_reg, \
4466 aapcs_ ## X ## _advance \
4467 }
4468
4469 /* Table of co-processors that can be used to pass arguments in
4470 registers. Idealy no arugment should be a candidate for more than
4471 one co-processor table entry, but the table is processed in order
4472 and stops after the first match. If that entry then fails to put
4473 the argument into a co-processor register, the argument will go on
4474 the stack. */
4475 static struct
4476 {
4477 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4478 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4479
4480 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4481 BLKmode) is a candidate for this co-processor's registers; this
4482 function should ignore any position-dependent state in
4483 CUMULATIVE_ARGS and only use call-type dependent information. */
4484 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4485
4486 /* Return true if the argument does get a co-processor register; it
4487 should set aapcs_reg to an RTX of the register allocated as is
4488 required for a return from FUNCTION_ARG. */
4489 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4490
4491 /* Return true if a result of mode MODE (or type TYPE if MODE is
4492 BLKmode) is can be returned in this co-processor's registers. */
4493 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4494
4495 /* Allocate and return an RTX element to hold the return type of a
4496 call, this routine must not fail and will only be called if
4497 is_return_candidate returned true with the same parameters. */
4498 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4499
4500 /* Finish processing this argument and prepare to start processing
4501 the next one. */
4502 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4503 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4504 {
4505 AAPCS_CP(vfp)
4506 };
4507
4508 #undef AAPCS_CP
4509
4510 static int
4511 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4512 const_tree type)
4513 {
4514 int i;
4515
4516 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4517 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4518 return i;
4519
4520 return -1;
4521 }
4522
4523 static int
4524 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4525 {
4526 /* We aren't passed a decl, so we can't check that a call is local.
4527 However, it isn't clear that that would be a win anyway, since it
4528 might limit some tail-calling opportunities. */
4529 enum arm_pcs pcs_variant;
4530
4531 if (fntype)
4532 {
4533 const_tree fndecl = NULL_TREE;
4534
4535 if (TREE_CODE (fntype) == FUNCTION_DECL)
4536 {
4537 fndecl = fntype;
4538 fntype = TREE_TYPE (fntype);
4539 }
4540
4541 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4542 }
4543 else
4544 pcs_variant = arm_pcs_default;
4545
4546 if (pcs_variant != ARM_PCS_AAPCS)
4547 {
4548 int i;
4549
4550 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4551 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4552 TYPE_MODE (type),
4553 type))
4554 return i;
4555 }
4556 return -1;
4557 }
4558
4559 static rtx
4560 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4561 const_tree fntype)
4562 {
4563 /* We aren't passed a decl, so we can't check that a call is local.
4564 However, it isn't clear that that would be a win anyway, since it
4565 might limit some tail-calling opportunities. */
4566 enum arm_pcs pcs_variant;
4567 int unsignedp ATTRIBUTE_UNUSED;
4568
4569 if (fntype)
4570 {
4571 const_tree fndecl = NULL_TREE;
4572
4573 if (TREE_CODE (fntype) == FUNCTION_DECL)
4574 {
4575 fndecl = fntype;
4576 fntype = TREE_TYPE (fntype);
4577 }
4578
4579 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4580 }
4581 else
4582 pcs_variant = arm_pcs_default;
4583
4584 /* Promote integer types. */
4585 if (type && INTEGRAL_TYPE_P (type))
4586 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4587
4588 if (pcs_variant != ARM_PCS_AAPCS)
4589 {
4590 int i;
4591
4592 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4593 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4594 type))
4595 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4596 mode, type);
4597 }
4598
4599 /* Promotes small structs returned in a register to full-word size
4600 for big-endian AAPCS. */
4601 if (type && arm_return_in_msb (type))
4602 {
4603 HOST_WIDE_INT size = int_size_in_bytes (type);
4604 if (size % UNITS_PER_WORD != 0)
4605 {
4606 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4607 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4608 }
4609 }
4610
4611 return gen_rtx_REG (mode, R0_REGNUM);
4612 }
4613
4614 static rtx
4615 aapcs_libcall_value (enum machine_mode mode)
4616 {
4617 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4618 && GET_MODE_SIZE (mode) <= 4)
4619 mode = SImode;
4620
4621 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4622 }
4623
4624 /* Lay out a function argument using the AAPCS rules. The rule
4625 numbers referred to here are those in the AAPCS. */
4626 static void
4627 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4628 const_tree type, bool named)
4629 {
4630 int nregs, nregs2;
4631 int ncrn;
4632
4633 /* We only need to do this once per argument. */
4634 if (pcum->aapcs_arg_processed)
4635 return;
4636
4637 pcum->aapcs_arg_processed = true;
4638
4639 /* Special case: if named is false then we are handling an incoming
4640 anonymous argument which is on the stack. */
4641 if (!named)
4642 return;
4643
4644 /* Is this a potential co-processor register candidate? */
4645 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4646 {
4647 int slot = aapcs_select_call_coproc (pcum, mode, type);
4648 pcum->aapcs_cprc_slot = slot;
4649
4650 /* We don't have to apply any of the rules from part B of the
4651 preparation phase, these are handled elsewhere in the
4652 compiler. */
4653
4654 if (slot >= 0)
4655 {
4656 /* A Co-processor register candidate goes either in its own
4657 class of registers or on the stack. */
4658 if (!pcum->aapcs_cprc_failed[slot])
4659 {
4660 /* C1.cp - Try to allocate the argument to co-processor
4661 registers. */
4662 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4663 return;
4664
4665 /* C2.cp - Put the argument on the stack and note that we
4666 can't assign any more candidates in this slot. We also
4667 need to note that we have allocated stack space, so that
4668 we won't later try to split a non-cprc candidate between
4669 core registers and the stack. */
4670 pcum->aapcs_cprc_failed[slot] = true;
4671 pcum->can_split = false;
4672 }
4673
4674 /* We didn't get a register, so this argument goes on the
4675 stack. */
4676 gcc_assert (pcum->can_split == false);
4677 return;
4678 }
4679 }
4680
4681 /* C3 - For double-word aligned arguments, round the NCRN up to the
4682 next even number. */
4683 ncrn = pcum->aapcs_ncrn;
4684 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4685 ncrn++;
4686
4687 nregs = ARM_NUM_REGS2(mode, type);
4688
4689 /* Sigh, this test should really assert that nregs > 0, but a GCC
4690 extension allows empty structs and then gives them empty size; it
4691 then allows such a structure to be passed by value. For some of
4692 the code below we have to pretend that such an argument has
4693 non-zero size so that we 'locate' it correctly either in
4694 registers or on the stack. */
4695 gcc_assert (nregs >= 0);
4696
4697 nregs2 = nregs ? nregs : 1;
4698
4699 /* C4 - Argument fits entirely in core registers. */
4700 if (ncrn + nregs2 <= NUM_ARG_REGS)
4701 {
4702 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4703 pcum->aapcs_next_ncrn = ncrn + nregs;
4704 return;
4705 }
4706
4707 /* C5 - Some core registers left and there are no arguments already
4708 on the stack: split this argument between the remaining core
4709 registers and the stack. */
4710 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4711 {
4712 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4713 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4714 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4715 return;
4716 }
4717
4718 /* C6 - NCRN is set to 4. */
4719 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4720
4721 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4722 return;
4723 }
4724
4725 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4726 for a call to a function whose data type is FNTYPE.
4727 For a library call, FNTYPE is NULL. */
4728 void
4729 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4730 rtx libname,
4731 tree fndecl ATTRIBUTE_UNUSED)
4732 {
4733 /* Long call handling. */
4734 if (fntype)
4735 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4736 else
4737 pcum->pcs_variant = arm_pcs_default;
4738
4739 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4740 {
4741 if (arm_libcall_uses_aapcs_base (libname))
4742 pcum->pcs_variant = ARM_PCS_AAPCS;
4743
4744 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4745 pcum->aapcs_reg = NULL_RTX;
4746 pcum->aapcs_partial = 0;
4747 pcum->aapcs_arg_processed = false;
4748 pcum->aapcs_cprc_slot = -1;
4749 pcum->can_split = true;
4750
4751 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4752 {
4753 int i;
4754
4755 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4756 {
4757 pcum->aapcs_cprc_failed[i] = false;
4758 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4759 }
4760 }
4761 return;
4762 }
4763
4764 /* Legacy ABIs */
4765
4766 /* On the ARM, the offset starts at 0. */
4767 pcum->nregs = 0;
4768 pcum->iwmmxt_nregs = 0;
4769 pcum->can_split = true;
4770
4771 /* Varargs vectors are treated the same as long long.
4772 named_count avoids having to change the way arm handles 'named' */
4773 pcum->named_count = 0;
4774 pcum->nargs = 0;
4775
4776 if (TARGET_REALLY_IWMMXT && fntype)
4777 {
4778 tree fn_arg;
4779
4780 for (fn_arg = TYPE_ARG_TYPES (fntype);
4781 fn_arg;
4782 fn_arg = TREE_CHAIN (fn_arg))
4783 pcum->named_count += 1;
4784
4785 if (! pcum->named_count)
4786 pcum->named_count = INT_MAX;
4787 }
4788 }
4789
4790
4791 /* Return true if mode/type need doubleword alignment. */
4792 static bool
4793 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4794 {
4795 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4796 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4797 }
4798
4799
4800 /* Determine where to put an argument to a function.
4801 Value is zero to push the argument on the stack,
4802 or a hard register in which to store the argument.
4803
4804 MODE is the argument's machine mode.
4805 TYPE is the data type of the argument (as a tree).
4806 This is null for libcalls where that information may
4807 not be available.
4808 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4809 the preceding args and about the function being called.
4810 NAMED is nonzero if this argument is a named parameter
4811 (otherwise it is an extra parameter matching an ellipsis).
4812
4813 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4814 other arguments are passed on the stack. If (NAMED == 0) (which happens
4815 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4816 defined), say it is passed in the stack (function_prologue will
4817 indeed make it pass in the stack if necessary). */
4818
4819 static rtx
4820 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4821 const_tree type, bool named)
4822 {
4823 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4824 int nregs;
4825
4826 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4827 a call insn (op3 of a call_value insn). */
4828 if (mode == VOIDmode)
4829 return const0_rtx;
4830
4831 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4832 {
4833 aapcs_layout_arg (pcum, mode, type, named);
4834 return pcum->aapcs_reg;
4835 }
4836
4837 /* Varargs vectors are treated the same as long long.
4838 named_count avoids having to change the way arm handles 'named' */
4839 if (TARGET_IWMMXT_ABI
4840 && arm_vector_mode_supported_p (mode)
4841 && pcum->named_count > pcum->nargs + 1)
4842 {
4843 if (pcum->iwmmxt_nregs <= 9)
4844 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4845 else
4846 {
4847 pcum->can_split = false;
4848 return NULL_RTX;
4849 }
4850 }
4851
4852 /* Put doubleword aligned quantities in even register pairs. */
4853 if (pcum->nregs & 1
4854 && ARM_DOUBLEWORD_ALIGN
4855 && arm_needs_doubleword_align (mode, type))
4856 pcum->nregs++;
4857
4858 /* Only allow splitting an arg between regs and memory if all preceding
4859 args were allocated to regs. For args passed by reference we only count
4860 the reference pointer. */
4861 if (pcum->can_split)
4862 nregs = 1;
4863 else
4864 nregs = ARM_NUM_REGS2 (mode, type);
4865
4866 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4867 return NULL_RTX;
4868
4869 return gen_rtx_REG (mode, pcum->nregs);
4870 }
4871
4872 static unsigned int
4873 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4874 {
4875 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4876 ? DOUBLEWORD_ALIGNMENT
4877 : PARM_BOUNDARY);
4878 }
4879
4880 static int
4881 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4882 tree type, bool named)
4883 {
4884 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4885 int nregs = pcum->nregs;
4886
4887 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4888 {
4889 aapcs_layout_arg (pcum, mode, type, named);
4890 return pcum->aapcs_partial;
4891 }
4892
4893 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4894 return 0;
4895
4896 if (NUM_ARG_REGS > nregs
4897 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4898 && pcum->can_split)
4899 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4900
4901 return 0;
4902 }
4903
4904 /* Update the data in PCUM to advance over an argument
4905 of mode MODE and data type TYPE.
4906 (TYPE is null for libcalls where that information may not be available.) */
4907
4908 static void
4909 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4910 const_tree type, bool named)
4911 {
4912 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4913
4914 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4915 {
4916 aapcs_layout_arg (pcum, mode, type, named);
4917
4918 if (pcum->aapcs_cprc_slot >= 0)
4919 {
4920 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4921 type);
4922 pcum->aapcs_cprc_slot = -1;
4923 }
4924
4925 /* Generic stuff. */
4926 pcum->aapcs_arg_processed = false;
4927 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4928 pcum->aapcs_reg = NULL_RTX;
4929 pcum->aapcs_partial = 0;
4930 }
4931 else
4932 {
4933 pcum->nargs += 1;
4934 if (arm_vector_mode_supported_p (mode)
4935 && pcum->named_count > pcum->nargs
4936 && TARGET_IWMMXT_ABI)
4937 pcum->iwmmxt_nregs += 1;
4938 else
4939 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4940 }
4941 }
4942
4943 /* Variable sized types are passed by reference. This is a GCC
4944 extension to the ARM ABI. */
4945
4946 static bool
4947 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4948 enum machine_mode mode ATTRIBUTE_UNUSED,
4949 const_tree type, bool named ATTRIBUTE_UNUSED)
4950 {
4951 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4952 }
4953 \f
4954 /* Encode the current state of the #pragma [no_]long_calls. */
4955 typedef enum
4956 {
4957 OFF, /* No #pragma [no_]long_calls is in effect. */
4958 LONG, /* #pragma long_calls is in effect. */
4959 SHORT /* #pragma no_long_calls is in effect. */
4960 } arm_pragma_enum;
4961
4962 static arm_pragma_enum arm_pragma_long_calls = OFF;
4963
4964 void
4965 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4966 {
4967 arm_pragma_long_calls = LONG;
4968 }
4969
4970 void
4971 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4972 {
4973 arm_pragma_long_calls = SHORT;
4974 }
4975
4976 void
4977 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4978 {
4979 arm_pragma_long_calls = OFF;
4980 }
4981 \f
4982 /* Handle an attribute requiring a FUNCTION_DECL;
4983 arguments as in struct attribute_spec.handler. */
4984 static tree
4985 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4986 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4987 {
4988 if (TREE_CODE (*node) != FUNCTION_DECL)
4989 {
4990 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4991 name);
4992 *no_add_attrs = true;
4993 }
4994
4995 return NULL_TREE;
4996 }
4997
4998 /* Handle an "interrupt" or "isr" attribute;
4999 arguments as in struct attribute_spec.handler. */
5000 static tree
5001 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5002 bool *no_add_attrs)
5003 {
5004 if (DECL_P (*node))
5005 {
5006 if (TREE_CODE (*node) != FUNCTION_DECL)
5007 {
5008 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5009 name);
5010 *no_add_attrs = true;
5011 }
5012 /* FIXME: the argument if any is checked for type attributes;
5013 should it be checked for decl ones? */
5014 }
5015 else
5016 {
5017 if (TREE_CODE (*node) == FUNCTION_TYPE
5018 || TREE_CODE (*node) == METHOD_TYPE)
5019 {
5020 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5021 {
5022 warning (OPT_Wattributes, "%qE attribute ignored",
5023 name);
5024 *no_add_attrs = true;
5025 }
5026 }
5027 else if (TREE_CODE (*node) == POINTER_TYPE
5028 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5029 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5030 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5031 {
5032 *node = build_variant_type_copy (*node);
5033 TREE_TYPE (*node) = build_type_attribute_variant
5034 (TREE_TYPE (*node),
5035 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5036 *no_add_attrs = true;
5037 }
5038 else
5039 {
5040 /* Possibly pass this attribute on from the type to a decl. */
5041 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5042 | (int) ATTR_FLAG_FUNCTION_NEXT
5043 | (int) ATTR_FLAG_ARRAY_NEXT))
5044 {
5045 *no_add_attrs = true;
5046 return tree_cons (name, args, NULL_TREE);
5047 }
5048 else
5049 {
5050 warning (OPT_Wattributes, "%qE attribute ignored",
5051 name);
5052 }
5053 }
5054 }
5055
5056 return NULL_TREE;
5057 }
5058
5059 /* Handle a "pcs" attribute; arguments as in struct
5060 attribute_spec.handler. */
5061 static tree
5062 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5063 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5064 {
5065 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5066 {
5067 warning (OPT_Wattributes, "%qE attribute ignored", name);
5068 *no_add_attrs = true;
5069 }
5070 return NULL_TREE;
5071 }
5072
5073 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5074 /* Handle the "notshared" attribute. This attribute is another way of
5075 requesting hidden visibility. ARM's compiler supports
5076 "__declspec(notshared)"; we support the same thing via an
5077 attribute. */
5078
5079 static tree
5080 arm_handle_notshared_attribute (tree *node,
5081 tree name ATTRIBUTE_UNUSED,
5082 tree args ATTRIBUTE_UNUSED,
5083 int flags ATTRIBUTE_UNUSED,
5084 bool *no_add_attrs)
5085 {
5086 tree decl = TYPE_NAME (*node);
5087
5088 if (decl)
5089 {
5090 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5091 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5092 *no_add_attrs = false;
5093 }
5094 return NULL_TREE;
5095 }
5096 #endif
5097
5098 /* Return 0 if the attributes for two types are incompatible, 1 if they
5099 are compatible, and 2 if they are nearly compatible (which causes a
5100 warning to be generated). */
5101 static int
5102 arm_comp_type_attributes (const_tree type1, const_tree type2)
5103 {
5104 int l1, l2, s1, s2;
5105
5106 /* Check for mismatch of non-default calling convention. */
5107 if (TREE_CODE (type1) != FUNCTION_TYPE)
5108 return 1;
5109
5110 /* Check for mismatched call attributes. */
5111 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5112 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5113 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5114 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5115
5116 /* Only bother to check if an attribute is defined. */
5117 if (l1 | l2 | s1 | s2)
5118 {
5119 /* If one type has an attribute, the other must have the same attribute. */
5120 if ((l1 != l2) || (s1 != s2))
5121 return 0;
5122
5123 /* Disallow mixed attributes. */
5124 if ((l1 & s2) || (l2 & s1))
5125 return 0;
5126 }
5127
5128 /* Check for mismatched ISR attribute. */
5129 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5130 if (! l1)
5131 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5132 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5133 if (! l2)
5134 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5135 if (l1 != l2)
5136 return 0;
5137
5138 return 1;
5139 }
5140
5141 /* Assigns default attributes to newly defined type. This is used to
5142 set short_call/long_call attributes for function types of
5143 functions defined inside corresponding #pragma scopes. */
5144 static void
5145 arm_set_default_type_attributes (tree type)
5146 {
5147 /* Add __attribute__ ((long_call)) to all functions, when
5148 inside #pragma long_calls or __attribute__ ((short_call)),
5149 when inside #pragma no_long_calls. */
5150 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5151 {
5152 tree type_attr_list, attr_name;
5153 type_attr_list = TYPE_ATTRIBUTES (type);
5154
5155 if (arm_pragma_long_calls == LONG)
5156 attr_name = get_identifier ("long_call");
5157 else if (arm_pragma_long_calls == SHORT)
5158 attr_name = get_identifier ("short_call");
5159 else
5160 return;
5161
5162 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5163 TYPE_ATTRIBUTES (type) = type_attr_list;
5164 }
5165 }
5166 \f
5167 /* Return true if DECL is known to be linked into section SECTION. */
5168
5169 static bool
5170 arm_function_in_section_p (tree decl, section *section)
5171 {
5172 /* We can only be certain about functions defined in the same
5173 compilation unit. */
5174 if (!TREE_STATIC (decl))
5175 return false;
5176
5177 /* Make sure that SYMBOL always binds to the definition in this
5178 compilation unit. */
5179 if (!targetm.binds_local_p (decl))
5180 return false;
5181
5182 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5183 if (!DECL_SECTION_NAME (decl))
5184 {
5185 /* Make sure that we will not create a unique section for DECL. */
5186 if (flag_function_sections || DECL_ONE_ONLY (decl))
5187 return false;
5188 }
5189
5190 return function_section (decl) == section;
5191 }
5192
5193 /* Return nonzero if a 32-bit "long_call" should be generated for
5194 a call from the current function to DECL. We generate a long_call
5195 if the function:
5196
5197 a. has an __attribute__((long call))
5198 or b. is within the scope of a #pragma long_calls
5199 or c. the -mlong-calls command line switch has been specified
5200
5201 However we do not generate a long call if the function:
5202
5203 d. has an __attribute__ ((short_call))
5204 or e. is inside the scope of a #pragma no_long_calls
5205 or f. is defined in the same section as the current function. */
5206
5207 bool
5208 arm_is_long_call_p (tree decl)
5209 {
5210 tree attrs;
5211
5212 if (!decl)
5213 return TARGET_LONG_CALLS;
5214
5215 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5216 if (lookup_attribute ("short_call", attrs))
5217 return false;
5218
5219 /* For "f", be conservative, and only cater for cases in which the
5220 whole of the current function is placed in the same section. */
5221 if (!flag_reorder_blocks_and_partition
5222 && TREE_CODE (decl) == FUNCTION_DECL
5223 && arm_function_in_section_p (decl, current_function_section ()))
5224 return false;
5225
5226 if (lookup_attribute ("long_call", attrs))
5227 return true;
5228
5229 return TARGET_LONG_CALLS;
5230 }
5231
5232 /* Return nonzero if it is ok to make a tail-call to DECL. */
5233 static bool
5234 arm_function_ok_for_sibcall (tree decl, tree exp)
5235 {
5236 unsigned long func_type;
5237
5238 if (cfun->machine->sibcall_blocked)
5239 return false;
5240
5241 /* Never tailcall something for which we have no decl, or if we
5242 are generating code for Thumb-1. */
5243 if (decl == NULL || TARGET_THUMB1)
5244 return false;
5245
5246 /* The PIC register is live on entry to VxWorks PLT entries, so we
5247 must make the call before restoring the PIC register. */
5248 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5249 return false;
5250
5251 /* Cannot tail-call to long calls, since these are out of range of
5252 a branch instruction. */
5253 if (arm_is_long_call_p (decl))
5254 return false;
5255
5256 /* If we are interworking and the function is not declared static
5257 then we can't tail-call it unless we know that it exists in this
5258 compilation unit (since it might be a Thumb routine). */
5259 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5260 return false;
5261
5262 func_type = arm_current_func_type ();
5263 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5264 if (IS_INTERRUPT (func_type))
5265 return false;
5266
5267 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5268 {
5269 /* Check that the return value locations are the same. For
5270 example that we aren't returning a value from the sibling in
5271 a VFP register but then need to transfer it to a core
5272 register. */
5273 rtx a, b;
5274
5275 a = arm_function_value (TREE_TYPE (exp), decl, false);
5276 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5277 cfun->decl, false);
5278 if (!rtx_equal_p (a, b))
5279 return false;
5280 }
5281
5282 /* Never tailcall if function may be called with a misaligned SP. */
5283 if (IS_STACKALIGN (func_type))
5284 return false;
5285
5286 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5287 references should become a NOP. Don't convert such calls into
5288 sibling calls. */
5289 if (TARGET_AAPCS_BASED
5290 && arm_abi == ARM_ABI_AAPCS
5291 && DECL_WEAK (decl))
5292 return false;
5293
5294 /* Everything else is ok. */
5295 return true;
5296 }
5297
5298 \f
5299 /* Addressing mode support functions. */
5300
5301 /* Return nonzero if X is a legitimate immediate operand when compiling
5302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5303 int
5304 legitimate_pic_operand_p (rtx x)
5305 {
5306 if (GET_CODE (x) == SYMBOL_REF
5307 || (GET_CODE (x) == CONST
5308 && GET_CODE (XEXP (x, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5310 return 0;
5311
5312 return 1;
5313 }
5314
5315 /* Record that the current function needs a PIC register. Initialize
5316 cfun->machine->pic_reg if we have not already done so. */
5317
5318 static void
5319 require_pic_register (void)
5320 {
5321 /* A lot of the logic here is made obscure by the fact that this
5322 routine gets called as part of the rtx cost estimation process.
5323 We don't want those calls to affect any assumptions about the real
5324 function; and further, we can't call entry_of_function() until we
5325 start the real expansion process. */
5326 if (!crtl->uses_pic_offset_table)
5327 {
5328 gcc_assert (can_create_pseudo_p ());
5329 if (arm_pic_register != INVALID_REGNUM)
5330 {
5331 if (!cfun->machine->pic_reg)
5332 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5333
5334 /* Play games to avoid marking the function as needing pic
5335 if we are being called as part of the cost-estimation
5336 process. */
5337 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5338 crtl->uses_pic_offset_table = 1;
5339 }
5340 else
5341 {
5342 rtx seq, insn;
5343
5344 if (!cfun->machine->pic_reg)
5345 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5346
5347 /* Play games to avoid marking the function as needing pic
5348 if we are being called as part of the cost-estimation
5349 process. */
5350 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5351 {
5352 crtl->uses_pic_offset_table = 1;
5353 start_sequence ();
5354
5355 arm_load_pic_register (0UL);
5356
5357 seq = get_insns ();
5358 end_sequence ();
5359
5360 for (insn = seq; insn; insn = NEXT_INSN (insn))
5361 if (INSN_P (insn))
5362 INSN_LOCATOR (insn) = prologue_locator;
5363
5364 /* We can be called during expansion of PHI nodes, where
5365 we can't yet emit instructions directly in the final
5366 insn stream. Queue the insns on the entry edge, they will
5367 be committed after everything else is expanded. */
5368 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5369 }
5370 }
5371 }
5372 }
5373
5374 rtx
5375 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5376 {
5377 if (GET_CODE (orig) == SYMBOL_REF
5378 || GET_CODE (orig) == LABEL_REF)
5379 {
5380 rtx insn;
5381
5382 if (reg == 0)
5383 {
5384 gcc_assert (can_create_pseudo_p ());
5385 reg = gen_reg_rtx (Pmode);
5386 }
5387
5388 /* VxWorks does not impose a fixed gap between segments; the run-time
5389 gap can be different from the object-file gap. We therefore can't
5390 use GOTOFF unless we are absolutely sure that the symbol is in the
5391 same segment as the GOT. Unfortunately, the flexibility of linker
5392 scripts means that we can't be sure of that in general, so assume
5393 that GOTOFF is never valid on VxWorks. */
5394 if ((GET_CODE (orig) == LABEL_REF
5395 || (GET_CODE (orig) == SYMBOL_REF &&
5396 SYMBOL_REF_LOCAL_P (orig)))
5397 && NEED_GOT_RELOC
5398 && !TARGET_VXWORKS_RTP)
5399 insn = arm_pic_static_addr (orig, reg);
5400 else
5401 {
5402 rtx pat;
5403 rtx mem;
5404
5405 /* If this function doesn't have a pic register, create one now. */
5406 require_pic_register ();
5407
5408 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5409
5410 /* Make the MEM as close to a constant as possible. */
5411 mem = SET_SRC (pat);
5412 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5413 MEM_READONLY_P (mem) = 1;
5414 MEM_NOTRAP_P (mem) = 1;
5415
5416 insn = emit_insn (pat);
5417 }
5418
5419 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5420 by loop. */
5421 set_unique_reg_note (insn, REG_EQUAL, orig);
5422
5423 return reg;
5424 }
5425 else if (GET_CODE (orig) == CONST)
5426 {
5427 rtx base, offset;
5428
5429 if (GET_CODE (XEXP (orig, 0)) == PLUS
5430 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5431 return orig;
5432
5433 /* Handle the case where we have: const (UNSPEC_TLS). */
5434 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5435 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5436 return orig;
5437
5438 /* Handle the case where we have:
5439 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5440 CONST_INT. */
5441 if (GET_CODE (XEXP (orig, 0)) == PLUS
5442 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5443 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5444 {
5445 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5446 return orig;
5447 }
5448
5449 if (reg == 0)
5450 {
5451 gcc_assert (can_create_pseudo_p ());
5452 reg = gen_reg_rtx (Pmode);
5453 }
5454
5455 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5456
5457 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5458 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5459 base == reg ? 0 : reg);
5460
5461 if (CONST_INT_P (offset))
5462 {
5463 /* The base register doesn't really matter, we only want to
5464 test the index for the appropriate mode. */
5465 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5466 {
5467 gcc_assert (can_create_pseudo_p ());
5468 offset = force_reg (Pmode, offset);
5469 }
5470
5471 if (CONST_INT_P (offset))
5472 return plus_constant (Pmode, base, INTVAL (offset));
5473 }
5474
5475 if (GET_MODE_SIZE (mode) > 4
5476 && (GET_MODE_CLASS (mode) == MODE_INT
5477 || TARGET_SOFT_FLOAT))
5478 {
5479 emit_insn (gen_addsi3 (reg, base, offset));
5480 return reg;
5481 }
5482
5483 return gen_rtx_PLUS (Pmode, base, offset);
5484 }
5485
5486 return orig;
5487 }
5488
5489
5490 /* Find a spare register to use during the prolog of a function. */
5491
5492 static int
5493 thumb_find_work_register (unsigned long pushed_regs_mask)
5494 {
5495 int reg;
5496
5497 /* Check the argument registers first as these are call-used. The
5498 register allocation order means that sometimes r3 might be used
5499 but earlier argument registers might not, so check them all. */
5500 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5501 if (!df_regs_ever_live_p (reg))
5502 return reg;
5503
5504 /* Before going on to check the call-saved registers we can try a couple
5505 more ways of deducing that r3 is available. The first is when we are
5506 pushing anonymous arguments onto the stack and we have less than 4
5507 registers worth of fixed arguments(*). In this case r3 will be part of
5508 the variable argument list and so we can be sure that it will be
5509 pushed right at the start of the function. Hence it will be available
5510 for the rest of the prologue.
5511 (*): ie crtl->args.pretend_args_size is greater than 0. */
5512 if (cfun->machine->uses_anonymous_args
5513 && crtl->args.pretend_args_size > 0)
5514 return LAST_ARG_REGNUM;
5515
5516 /* The other case is when we have fixed arguments but less than 4 registers
5517 worth. In this case r3 might be used in the body of the function, but
5518 it is not being used to convey an argument into the function. In theory
5519 we could just check crtl->args.size to see how many bytes are
5520 being passed in argument registers, but it seems that it is unreliable.
5521 Sometimes it will have the value 0 when in fact arguments are being
5522 passed. (See testcase execute/20021111-1.c for an example). So we also
5523 check the args_info.nregs field as well. The problem with this field is
5524 that it makes no allowances for arguments that are passed to the
5525 function but which are not used. Hence we could miss an opportunity
5526 when a function has an unused argument in r3. But it is better to be
5527 safe than to be sorry. */
5528 if (! cfun->machine->uses_anonymous_args
5529 && crtl->args.size >= 0
5530 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5531 && crtl->args.info.nregs < 4)
5532 return LAST_ARG_REGNUM;
5533
5534 /* Otherwise look for a call-saved register that is going to be pushed. */
5535 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5536 if (pushed_regs_mask & (1 << reg))
5537 return reg;
5538
5539 if (TARGET_THUMB2)
5540 {
5541 /* Thumb-2 can use high regs. */
5542 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5543 if (pushed_regs_mask & (1 << reg))
5544 return reg;
5545 }
5546 /* Something went wrong - thumb_compute_save_reg_mask()
5547 should have arranged for a suitable register to be pushed. */
5548 gcc_unreachable ();
5549 }
5550
5551 static GTY(()) int pic_labelno;
5552
5553 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5554 low register. */
5555
5556 void
5557 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5558 {
5559 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5560
5561 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5562 return;
5563
5564 gcc_assert (flag_pic);
5565
5566 pic_reg = cfun->machine->pic_reg;
5567 if (TARGET_VXWORKS_RTP)
5568 {
5569 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5570 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5571 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5572
5573 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5574
5575 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5576 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5577 }
5578 else
5579 {
5580 /* We use an UNSPEC rather than a LABEL_REF because this label
5581 never appears in the code stream. */
5582
5583 labelno = GEN_INT (pic_labelno++);
5584 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5585 l1 = gen_rtx_CONST (VOIDmode, l1);
5586
5587 /* On the ARM the PC register contains 'dot + 8' at the time of the
5588 addition, on the Thumb it is 'dot + 4'. */
5589 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5590 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5591 UNSPEC_GOTSYM_OFF);
5592 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5593
5594 if (TARGET_32BIT)
5595 {
5596 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5597 }
5598 else /* TARGET_THUMB1 */
5599 {
5600 if (arm_pic_register != INVALID_REGNUM
5601 && REGNO (pic_reg) > LAST_LO_REGNUM)
5602 {
5603 /* We will have pushed the pic register, so we should always be
5604 able to find a work register. */
5605 pic_tmp = gen_rtx_REG (SImode,
5606 thumb_find_work_register (saved_regs));
5607 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5608 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5609 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5610 }
5611 else
5612 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5613 }
5614 }
5615
5616 /* Need to emit this whether or not we obey regdecls,
5617 since setjmp/longjmp can cause life info to screw up. */
5618 emit_use (pic_reg);
5619 }
5620
5621 /* Generate code to load the address of a static var when flag_pic is set. */
5622 static rtx
5623 arm_pic_static_addr (rtx orig, rtx reg)
5624 {
5625 rtx l1, labelno, offset_rtx, insn;
5626
5627 gcc_assert (flag_pic);
5628
5629 /* We use an UNSPEC rather than a LABEL_REF because this label
5630 never appears in the code stream. */
5631 labelno = GEN_INT (pic_labelno++);
5632 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5633 l1 = gen_rtx_CONST (VOIDmode, l1);
5634
5635 /* On the ARM the PC register contains 'dot + 8' at the time of the
5636 addition, on the Thumb it is 'dot + 4'. */
5637 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5638 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5639 UNSPEC_SYMBOL_OFFSET);
5640 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5641
5642 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5643 return insn;
5644 }
5645
5646 /* Return nonzero if X is valid as an ARM state addressing register. */
5647 static int
5648 arm_address_register_rtx_p (rtx x, int strict_p)
5649 {
5650 int regno;
5651
5652 if (!REG_P (x))
5653 return 0;
5654
5655 regno = REGNO (x);
5656
5657 if (strict_p)
5658 return ARM_REGNO_OK_FOR_BASE_P (regno);
5659
5660 return (regno <= LAST_ARM_REGNUM
5661 || regno >= FIRST_PSEUDO_REGISTER
5662 || regno == FRAME_POINTER_REGNUM
5663 || regno == ARG_POINTER_REGNUM);
5664 }
5665
5666 /* Return TRUE if this rtx is the difference of a symbol and a label,
5667 and will reduce to a PC-relative relocation in the object file.
5668 Expressions like this can be left alone when generating PIC, rather
5669 than forced through the GOT. */
5670 static int
5671 pcrel_constant_p (rtx x)
5672 {
5673 if (GET_CODE (x) == MINUS)
5674 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5675
5676 return FALSE;
5677 }
5678
5679 /* Return true if X will surely end up in an index register after next
5680 splitting pass. */
5681 static bool
5682 will_be_in_index_register (const_rtx x)
5683 {
5684 /* arm.md: calculate_pic_address will split this into a register. */
5685 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5686 }
5687
5688 /* Return nonzero if X is a valid ARM state address operand. */
5689 int
5690 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5691 int strict_p)
5692 {
5693 bool use_ldrd;
5694 enum rtx_code code = GET_CODE (x);
5695
5696 if (arm_address_register_rtx_p (x, strict_p))
5697 return 1;
5698
5699 use_ldrd = (TARGET_LDRD
5700 && (mode == DImode
5701 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5702
5703 if (code == POST_INC || code == PRE_DEC
5704 || ((code == PRE_INC || code == POST_DEC)
5705 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5706 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5707
5708 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5709 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5710 && GET_CODE (XEXP (x, 1)) == PLUS
5711 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5712 {
5713 rtx addend = XEXP (XEXP (x, 1), 1);
5714
5715 /* Don't allow ldrd post increment by register because it's hard
5716 to fixup invalid register choices. */
5717 if (use_ldrd
5718 && GET_CODE (x) == POST_MODIFY
5719 && REG_P (addend))
5720 return 0;
5721
5722 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5723 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5724 }
5725
5726 /* After reload constants split into minipools will have addresses
5727 from a LABEL_REF. */
5728 else if (reload_completed
5729 && (code == LABEL_REF
5730 || (code == CONST
5731 && GET_CODE (XEXP (x, 0)) == PLUS
5732 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5733 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5734 return 1;
5735
5736 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5737 return 0;
5738
5739 else if (code == PLUS)
5740 {
5741 rtx xop0 = XEXP (x, 0);
5742 rtx xop1 = XEXP (x, 1);
5743
5744 return ((arm_address_register_rtx_p (xop0, strict_p)
5745 && ((CONST_INT_P (xop1)
5746 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5747 || (!strict_p && will_be_in_index_register (xop1))))
5748 || (arm_address_register_rtx_p (xop1, strict_p)
5749 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5750 }
5751
5752 #if 0
5753 /* Reload currently can't handle MINUS, so disable this for now */
5754 else if (GET_CODE (x) == MINUS)
5755 {
5756 rtx xop0 = XEXP (x, 0);
5757 rtx xop1 = XEXP (x, 1);
5758
5759 return (arm_address_register_rtx_p (xop0, strict_p)
5760 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5761 }
5762 #endif
5763
5764 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5765 && code == SYMBOL_REF
5766 && CONSTANT_POOL_ADDRESS_P (x)
5767 && ! (flag_pic
5768 && symbol_mentioned_p (get_pool_constant (x))
5769 && ! pcrel_constant_p (get_pool_constant (x))))
5770 return 1;
5771
5772 return 0;
5773 }
5774
5775 /* Return nonzero if X is a valid Thumb-2 address operand. */
5776 static int
5777 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5778 {
5779 bool use_ldrd;
5780 enum rtx_code code = GET_CODE (x);
5781
5782 if (arm_address_register_rtx_p (x, strict_p))
5783 return 1;
5784
5785 use_ldrd = (TARGET_LDRD
5786 && (mode == DImode
5787 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5788
5789 if (code == POST_INC || code == PRE_DEC
5790 || ((code == PRE_INC || code == POST_DEC)
5791 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5792 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5793
5794 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5795 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5796 && GET_CODE (XEXP (x, 1)) == PLUS
5797 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5798 {
5799 /* Thumb-2 only has autoincrement by constant. */
5800 rtx addend = XEXP (XEXP (x, 1), 1);
5801 HOST_WIDE_INT offset;
5802
5803 if (!CONST_INT_P (addend))
5804 return 0;
5805
5806 offset = INTVAL(addend);
5807 if (GET_MODE_SIZE (mode) <= 4)
5808 return (offset > -256 && offset < 256);
5809
5810 return (use_ldrd && offset > -1024 && offset < 1024
5811 && (offset & 3) == 0);
5812 }
5813
5814 /* After reload constants split into minipools will have addresses
5815 from a LABEL_REF. */
5816 else if (reload_completed
5817 && (code == LABEL_REF
5818 || (code == CONST
5819 && GET_CODE (XEXP (x, 0)) == PLUS
5820 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5821 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5822 return 1;
5823
5824 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5825 return 0;
5826
5827 else if (code == PLUS)
5828 {
5829 rtx xop0 = XEXP (x, 0);
5830 rtx xop1 = XEXP (x, 1);
5831
5832 return ((arm_address_register_rtx_p (xop0, strict_p)
5833 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5834 || (!strict_p && will_be_in_index_register (xop1))))
5835 || (arm_address_register_rtx_p (xop1, strict_p)
5836 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5837 }
5838
5839 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5840 && code == SYMBOL_REF
5841 && CONSTANT_POOL_ADDRESS_P (x)
5842 && ! (flag_pic
5843 && symbol_mentioned_p (get_pool_constant (x))
5844 && ! pcrel_constant_p (get_pool_constant (x))))
5845 return 1;
5846
5847 return 0;
5848 }
5849
5850 /* Return nonzero if INDEX is valid for an address index operand in
5851 ARM state. */
5852 static int
5853 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5854 int strict_p)
5855 {
5856 HOST_WIDE_INT range;
5857 enum rtx_code code = GET_CODE (index);
5858
5859 /* Standard coprocessor addressing modes. */
5860 if (TARGET_HARD_FLOAT
5861 && TARGET_VFP
5862 && (mode == SFmode || mode == DFmode))
5863 return (code == CONST_INT && INTVAL (index) < 1024
5864 && INTVAL (index) > -1024
5865 && (INTVAL (index) & 3) == 0);
5866
5867 /* For quad modes, we restrict the constant offset to be slightly less
5868 than what the instruction format permits. We do this because for
5869 quad mode moves, we will actually decompose them into two separate
5870 double-mode reads or writes. INDEX must therefore be a valid
5871 (double-mode) offset and so should INDEX+8. */
5872 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5873 return (code == CONST_INT
5874 && INTVAL (index) < 1016
5875 && INTVAL (index) > -1024
5876 && (INTVAL (index) & 3) == 0);
5877
5878 /* We have no such constraint on double mode offsets, so we permit the
5879 full range of the instruction format. */
5880 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5881 return (code == CONST_INT
5882 && INTVAL (index) < 1024
5883 && INTVAL (index) > -1024
5884 && (INTVAL (index) & 3) == 0);
5885
5886 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5887 return (code == CONST_INT
5888 && INTVAL (index) < 1024
5889 && INTVAL (index) > -1024
5890 && (INTVAL (index) & 3) == 0);
5891
5892 if (arm_address_register_rtx_p (index, strict_p)
5893 && (GET_MODE_SIZE (mode) <= 4))
5894 return 1;
5895
5896 if (mode == DImode || mode == DFmode)
5897 {
5898 if (code == CONST_INT)
5899 {
5900 HOST_WIDE_INT val = INTVAL (index);
5901
5902 if (TARGET_LDRD)
5903 return val > -256 && val < 256;
5904 else
5905 return val > -4096 && val < 4092;
5906 }
5907
5908 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5909 }
5910
5911 if (GET_MODE_SIZE (mode) <= 4
5912 && ! (arm_arch4
5913 && (mode == HImode
5914 || mode == HFmode
5915 || (mode == QImode && outer == SIGN_EXTEND))))
5916 {
5917 if (code == MULT)
5918 {
5919 rtx xiop0 = XEXP (index, 0);
5920 rtx xiop1 = XEXP (index, 1);
5921
5922 return ((arm_address_register_rtx_p (xiop0, strict_p)
5923 && power_of_two_operand (xiop1, SImode))
5924 || (arm_address_register_rtx_p (xiop1, strict_p)
5925 && power_of_two_operand (xiop0, SImode)));
5926 }
5927 else if (code == LSHIFTRT || code == ASHIFTRT
5928 || code == ASHIFT || code == ROTATERT)
5929 {
5930 rtx op = XEXP (index, 1);
5931
5932 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5933 && CONST_INT_P (op)
5934 && INTVAL (op) > 0
5935 && INTVAL (op) <= 31);
5936 }
5937 }
5938
5939 /* For ARM v4 we may be doing a sign-extend operation during the
5940 load. */
5941 if (arm_arch4)
5942 {
5943 if (mode == HImode
5944 || mode == HFmode
5945 || (outer == SIGN_EXTEND && mode == QImode))
5946 range = 256;
5947 else
5948 range = 4096;
5949 }
5950 else
5951 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5952
5953 return (code == CONST_INT
5954 && INTVAL (index) < range
5955 && INTVAL (index) > -range);
5956 }
5957
5958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5959 index operand. i.e. 1, 2, 4 or 8. */
5960 static bool
5961 thumb2_index_mul_operand (rtx op)
5962 {
5963 HOST_WIDE_INT val;
5964
5965 if (!CONST_INT_P (op))
5966 return false;
5967
5968 val = INTVAL(op);
5969 return (val == 1 || val == 2 || val == 4 || val == 8);
5970 }
5971
5972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5973 static int
5974 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5975 {
5976 enum rtx_code code = GET_CODE (index);
5977
5978 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5979 /* Standard coprocessor addressing modes. */
5980 if (TARGET_HARD_FLOAT
5981 && TARGET_VFP
5982 && (mode == SFmode || mode == DFmode))
5983 return (code == CONST_INT && INTVAL (index) < 1024
5984 /* Thumb-2 allows only > -256 index range for it's core register
5985 load/stores. Since we allow SF/DF in core registers, we have
5986 to use the intersection between -256~4096 (core) and -1024~1024
5987 (coprocessor). */
5988 && INTVAL (index) > -256
5989 && (INTVAL (index) & 3) == 0);
5990
5991 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5992 {
5993 /* For DImode assume values will usually live in core regs
5994 and only allow LDRD addressing modes. */
5995 if (!TARGET_LDRD || mode != DImode)
5996 return (code == CONST_INT
5997 && INTVAL (index) < 1024
5998 && INTVAL (index) > -1024
5999 && (INTVAL (index) & 3) == 0);
6000 }
6001
6002 /* For quad modes, we restrict the constant offset to be slightly less
6003 than what the instruction format permits. We do this because for
6004 quad mode moves, we will actually decompose them into two separate
6005 double-mode reads or writes. INDEX must therefore be a valid
6006 (double-mode) offset and so should INDEX+8. */
6007 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6008 return (code == CONST_INT
6009 && INTVAL (index) < 1016
6010 && INTVAL (index) > -1024
6011 && (INTVAL (index) & 3) == 0);
6012
6013 /* We have no such constraint on double mode offsets, so we permit the
6014 full range of the instruction format. */
6015 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6016 return (code == CONST_INT
6017 && INTVAL (index) < 1024
6018 && INTVAL (index) > -1024
6019 && (INTVAL (index) & 3) == 0);
6020
6021 if (arm_address_register_rtx_p (index, strict_p)
6022 && (GET_MODE_SIZE (mode) <= 4))
6023 return 1;
6024
6025 if (mode == DImode || mode == DFmode)
6026 {
6027 if (code == CONST_INT)
6028 {
6029 HOST_WIDE_INT val = INTVAL (index);
6030 /* ??? Can we assume ldrd for thumb2? */
6031 /* Thumb-2 ldrd only has reg+const addressing modes. */
6032 /* ldrd supports offsets of +-1020.
6033 However the ldr fallback does not. */
6034 return val > -256 && val < 256 && (val & 3) == 0;
6035 }
6036 else
6037 return 0;
6038 }
6039
6040 if (code == MULT)
6041 {
6042 rtx xiop0 = XEXP (index, 0);
6043 rtx xiop1 = XEXP (index, 1);
6044
6045 return ((arm_address_register_rtx_p (xiop0, strict_p)
6046 && thumb2_index_mul_operand (xiop1))
6047 || (arm_address_register_rtx_p (xiop1, strict_p)
6048 && thumb2_index_mul_operand (xiop0)));
6049 }
6050 else if (code == ASHIFT)
6051 {
6052 rtx op = XEXP (index, 1);
6053
6054 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6055 && CONST_INT_P (op)
6056 && INTVAL (op) > 0
6057 && INTVAL (op) <= 3);
6058 }
6059
6060 return (code == CONST_INT
6061 && INTVAL (index) < 4096
6062 && INTVAL (index) > -256);
6063 }
6064
6065 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6066 static int
6067 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6068 {
6069 int regno;
6070
6071 if (!REG_P (x))
6072 return 0;
6073
6074 regno = REGNO (x);
6075
6076 if (strict_p)
6077 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6078
6079 return (regno <= LAST_LO_REGNUM
6080 || regno > LAST_VIRTUAL_REGISTER
6081 || regno == FRAME_POINTER_REGNUM
6082 || (GET_MODE_SIZE (mode) >= 4
6083 && (regno == STACK_POINTER_REGNUM
6084 || regno >= FIRST_PSEUDO_REGISTER
6085 || x == hard_frame_pointer_rtx
6086 || x == arg_pointer_rtx)));
6087 }
6088
6089 /* Return nonzero if x is a legitimate index register. This is the case
6090 for any base register that can access a QImode object. */
6091 inline static int
6092 thumb1_index_register_rtx_p (rtx x, int strict_p)
6093 {
6094 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6095 }
6096
6097 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6098
6099 The AP may be eliminated to either the SP or the FP, so we use the
6100 least common denominator, e.g. SImode, and offsets from 0 to 64.
6101
6102 ??? Verify whether the above is the right approach.
6103
6104 ??? Also, the FP may be eliminated to the SP, so perhaps that
6105 needs special handling also.
6106
6107 ??? Look at how the mips16 port solves this problem. It probably uses
6108 better ways to solve some of these problems.
6109
6110 Although it is not incorrect, we don't accept QImode and HImode
6111 addresses based on the frame pointer or arg pointer until the
6112 reload pass starts. This is so that eliminating such addresses
6113 into stack based ones won't produce impossible code. */
6114 int
6115 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6116 {
6117 /* ??? Not clear if this is right. Experiment. */
6118 if (GET_MODE_SIZE (mode) < 4
6119 && !(reload_in_progress || reload_completed)
6120 && (reg_mentioned_p (frame_pointer_rtx, x)
6121 || reg_mentioned_p (arg_pointer_rtx, x)
6122 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6123 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6124 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6125 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6126 return 0;
6127
6128 /* Accept any base register. SP only in SImode or larger. */
6129 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6130 return 1;
6131
6132 /* This is PC relative data before arm_reorg runs. */
6133 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6134 && GET_CODE (x) == SYMBOL_REF
6135 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6136 return 1;
6137
6138 /* This is PC relative data after arm_reorg runs. */
6139 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6140 && reload_completed
6141 && (GET_CODE (x) == LABEL_REF
6142 || (GET_CODE (x) == CONST
6143 && GET_CODE (XEXP (x, 0)) == PLUS
6144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6145 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6146 return 1;
6147
6148 /* Post-inc indexing only supported for SImode and larger. */
6149 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6150 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6151 return 1;
6152
6153 else if (GET_CODE (x) == PLUS)
6154 {
6155 /* REG+REG address can be any two index registers. */
6156 /* We disallow FRAME+REG addressing since we know that FRAME
6157 will be replaced with STACK, and SP relative addressing only
6158 permits SP+OFFSET. */
6159 if (GET_MODE_SIZE (mode) <= 4
6160 && XEXP (x, 0) != frame_pointer_rtx
6161 && XEXP (x, 1) != frame_pointer_rtx
6162 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6163 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6164 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6165 return 1;
6166
6167 /* REG+const has 5-7 bit offset for non-SP registers. */
6168 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6169 || XEXP (x, 0) == arg_pointer_rtx)
6170 && CONST_INT_P (XEXP (x, 1))
6171 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6172 return 1;
6173
6174 /* REG+const has 10-bit offset for SP, but only SImode and
6175 larger is supported. */
6176 /* ??? Should probably check for DI/DFmode overflow here
6177 just like GO_IF_LEGITIMATE_OFFSET does. */
6178 else if (REG_P (XEXP (x, 0))
6179 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6180 && GET_MODE_SIZE (mode) >= 4
6181 && CONST_INT_P (XEXP (x, 1))
6182 && INTVAL (XEXP (x, 1)) >= 0
6183 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6184 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6185 return 1;
6186
6187 else if (REG_P (XEXP (x, 0))
6188 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6189 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6190 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6191 && REGNO (XEXP (x, 0))
6192 <= LAST_VIRTUAL_POINTER_REGISTER))
6193 && GET_MODE_SIZE (mode) >= 4
6194 && CONST_INT_P (XEXP (x, 1))
6195 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6196 return 1;
6197 }
6198
6199 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6200 && GET_MODE_SIZE (mode) == 4
6201 && GET_CODE (x) == SYMBOL_REF
6202 && CONSTANT_POOL_ADDRESS_P (x)
6203 && ! (flag_pic
6204 && symbol_mentioned_p (get_pool_constant (x))
6205 && ! pcrel_constant_p (get_pool_constant (x))))
6206 return 1;
6207
6208 return 0;
6209 }
6210
6211 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6212 instruction of mode MODE. */
6213 int
6214 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6215 {
6216 switch (GET_MODE_SIZE (mode))
6217 {
6218 case 1:
6219 return val >= 0 && val < 32;
6220
6221 case 2:
6222 return val >= 0 && val < 64 && (val & 1) == 0;
6223
6224 default:
6225 return (val >= 0
6226 && (val + GET_MODE_SIZE (mode)) <= 128
6227 && (val & 3) == 0);
6228 }
6229 }
6230
6231 bool
6232 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6233 {
6234 if (TARGET_ARM)
6235 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6236 else if (TARGET_THUMB2)
6237 return thumb2_legitimate_address_p (mode, x, strict_p);
6238 else /* if (TARGET_THUMB1) */
6239 return thumb1_legitimate_address_p (mode, x, strict_p);
6240 }
6241
6242 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6243
6244 Given an rtx X being reloaded into a reg required to be
6245 in class CLASS, return the class of reg to actually use.
6246 In general this is just CLASS, but for the Thumb core registers and
6247 immediate constants we prefer a LO_REGS class or a subset. */
6248
6249 static reg_class_t
6250 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6251 {
6252 if (TARGET_32BIT)
6253 return rclass;
6254 else
6255 {
6256 if (rclass == GENERAL_REGS
6257 || rclass == HI_REGS
6258 || rclass == NO_REGS
6259 || rclass == STACK_REG)
6260 return LO_REGS;
6261 else
6262 return rclass;
6263 }
6264 }
6265
6266 /* Build the SYMBOL_REF for __tls_get_addr. */
6267
6268 static GTY(()) rtx tls_get_addr_libfunc;
6269
6270 static rtx
6271 get_tls_get_addr (void)
6272 {
6273 if (!tls_get_addr_libfunc)
6274 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6275 return tls_get_addr_libfunc;
6276 }
6277
6278 static rtx
6279 arm_load_tp (rtx target)
6280 {
6281 if (!target)
6282 target = gen_reg_rtx (SImode);
6283
6284 if (TARGET_HARD_TP)
6285 {
6286 /* Can return in any reg. */
6287 emit_insn (gen_load_tp_hard (target));
6288 }
6289 else
6290 {
6291 /* Always returned in r0. Immediately copy the result into a pseudo,
6292 otherwise other uses of r0 (e.g. setting up function arguments) may
6293 clobber the value. */
6294
6295 rtx tmp;
6296
6297 emit_insn (gen_load_tp_soft ());
6298
6299 tmp = gen_rtx_REG (SImode, 0);
6300 emit_move_insn (target, tmp);
6301 }
6302 return target;
6303 }
6304
6305 static rtx
6306 load_tls_operand (rtx x, rtx reg)
6307 {
6308 rtx tmp;
6309
6310 if (reg == NULL_RTX)
6311 reg = gen_reg_rtx (SImode);
6312
6313 tmp = gen_rtx_CONST (SImode, x);
6314
6315 emit_move_insn (reg, tmp);
6316
6317 return reg;
6318 }
6319
6320 static rtx
6321 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6322 {
6323 rtx insns, label, labelno, sum;
6324
6325 gcc_assert (reloc != TLS_DESCSEQ);
6326 start_sequence ();
6327
6328 labelno = GEN_INT (pic_labelno++);
6329 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6330 label = gen_rtx_CONST (VOIDmode, label);
6331
6332 sum = gen_rtx_UNSPEC (Pmode,
6333 gen_rtvec (4, x, GEN_INT (reloc), label,
6334 GEN_INT (TARGET_ARM ? 8 : 4)),
6335 UNSPEC_TLS);
6336 reg = load_tls_operand (sum, reg);
6337
6338 if (TARGET_ARM)
6339 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6340 else
6341 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6342
6343 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6344 LCT_PURE, /* LCT_CONST? */
6345 Pmode, 1, reg, Pmode);
6346
6347 insns = get_insns ();
6348 end_sequence ();
6349
6350 return insns;
6351 }
6352
6353 static rtx
6354 arm_tls_descseq_addr (rtx x, rtx reg)
6355 {
6356 rtx labelno = GEN_INT (pic_labelno++);
6357 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6358 rtx sum = gen_rtx_UNSPEC (Pmode,
6359 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6360 gen_rtx_CONST (VOIDmode, label),
6361 GEN_INT (!TARGET_ARM)),
6362 UNSPEC_TLS);
6363 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6364
6365 emit_insn (gen_tlscall (x, labelno));
6366 if (!reg)
6367 reg = gen_reg_rtx (SImode);
6368 else
6369 gcc_assert (REGNO (reg) != 0);
6370
6371 emit_move_insn (reg, reg0);
6372
6373 return reg;
6374 }
6375
6376 rtx
6377 legitimize_tls_address (rtx x, rtx reg)
6378 {
6379 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6380 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6381
6382 switch (model)
6383 {
6384 case TLS_MODEL_GLOBAL_DYNAMIC:
6385 if (TARGET_GNU2_TLS)
6386 {
6387 reg = arm_tls_descseq_addr (x, reg);
6388
6389 tp = arm_load_tp (NULL_RTX);
6390
6391 dest = gen_rtx_PLUS (Pmode, tp, reg);
6392 }
6393 else
6394 {
6395 /* Original scheme */
6396 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6397 dest = gen_reg_rtx (Pmode);
6398 emit_libcall_block (insns, dest, ret, x);
6399 }
6400 return dest;
6401
6402 case TLS_MODEL_LOCAL_DYNAMIC:
6403 if (TARGET_GNU2_TLS)
6404 {
6405 reg = arm_tls_descseq_addr (x, reg);
6406
6407 tp = arm_load_tp (NULL_RTX);
6408
6409 dest = gen_rtx_PLUS (Pmode, tp, reg);
6410 }
6411 else
6412 {
6413 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6414
6415 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6416 share the LDM result with other LD model accesses. */
6417 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6418 UNSPEC_TLS);
6419 dest = gen_reg_rtx (Pmode);
6420 emit_libcall_block (insns, dest, ret, eqv);
6421
6422 /* Load the addend. */
6423 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6424 GEN_INT (TLS_LDO32)),
6425 UNSPEC_TLS);
6426 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6427 dest = gen_rtx_PLUS (Pmode, dest, addend);
6428 }
6429 return dest;
6430
6431 case TLS_MODEL_INITIAL_EXEC:
6432 labelno = GEN_INT (pic_labelno++);
6433 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6434 label = gen_rtx_CONST (VOIDmode, label);
6435 sum = gen_rtx_UNSPEC (Pmode,
6436 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6437 GEN_INT (TARGET_ARM ? 8 : 4)),
6438 UNSPEC_TLS);
6439 reg = load_tls_operand (sum, reg);
6440
6441 if (TARGET_ARM)
6442 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6443 else if (TARGET_THUMB2)
6444 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6445 else
6446 {
6447 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6448 emit_move_insn (reg, gen_const_mem (SImode, reg));
6449 }
6450
6451 tp = arm_load_tp (NULL_RTX);
6452
6453 return gen_rtx_PLUS (Pmode, tp, reg);
6454
6455 case TLS_MODEL_LOCAL_EXEC:
6456 tp = arm_load_tp (NULL_RTX);
6457
6458 reg = gen_rtx_UNSPEC (Pmode,
6459 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6460 UNSPEC_TLS);
6461 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6462
6463 return gen_rtx_PLUS (Pmode, tp, reg);
6464
6465 default:
6466 abort ();
6467 }
6468 }
6469
6470 /* Try machine-dependent ways of modifying an illegitimate address
6471 to be legitimate. If we find one, return the new, valid address. */
6472 rtx
6473 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6474 {
6475 if (!TARGET_ARM)
6476 {
6477 /* TODO: legitimize_address for Thumb2. */
6478 if (TARGET_THUMB2)
6479 return x;
6480 return thumb_legitimize_address (x, orig_x, mode);
6481 }
6482
6483 if (arm_tls_symbol_p (x))
6484 return legitimize_tls_address (x, NULL_RTX);
6485
6486 if (GET_CODE (x) == PLUS)
6487 {
6488 rtx xop0 = XEXP (x, 0);
6489 rtx xop1 = XEXP (x, 1);
6490
6491 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6492 xop0 = force_reg (SImode, xop0);
6493
6494 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6495 xop1 = force_reg (SImode, xop1);
6496
6497 if (ARM_BASE_REGISTER_RTX_P (xop0)
6498 && CONST_INT_P (xop1))
6499 {
6500 HOST_WIDE_INT n, low_n;
6501 rtx base_reg, val;
6502 n = INTVAL (xop1);
6503
6504 /* VFP addressing modes actually allow greater offsets, but for
6505 now we just stick with the lowest common denominator. */
6506 if (mode == DImode
6507 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6508 {
6509 low_n = n & 0x0f;
6510 n &= ~0x0f;
6511 if (low_n > 4)
6512 {
6513 n += 16;
6514 low_n -= 16;
6515 }
6516 }
6517 else
6518 {
6519 low_n = ((mode) == TImode ? 0
6520 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6521 n -= low_n;
6522 }
6523
6524 base_reg = gen_reg_rtx (SImode);
6525 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6526 emit_move_insn (base_reg, val);
6527 x = plus_constant (Pmode, base_reg, low_n);
6528 }
6529 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6530 x = gen_rtx_PLUS (SImode, xop0, xop1);
6531 }
6532
6533 /* XXX We don't allow MINUS any more -- see comment in
6534 arm_legitimate_address_outer_p (). */
6535 else if (GET_CODE (x) == MINUS)
6536 {
6537 rtx xop0 = XEXP (x, 0);
6538 rtx xop1 = XEXP (x, 1);
6539
6540 if (CONSTANT_P (xop0))
6541 xop0 = force_reg (SImode, xop0);
6542
6543 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6544 xop1 = force_reg (SImode, xop1);
6545
6546 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6547 x = gen_rtx_MINUS (SImode, xop0, xop1);
6548 }
6549
6550 /* Make sure to take full advantage of the pre-indexed addressing mode
6551 with absolute addresses which often allows for the base register to
6552 be factorized for multiple adjacent memory references, and it might
6553 even allows for the mini pool to be avoided entirely. */
6554 else if (CONST_INT_P (x) && optimize > 0)
6555 {
6556 unsigned int bits;
6557 HOST_WIDE_INT mask, base, index;
6558 rtx base_reg;
6559
6560 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6561 use a 8-bit index. So let's use a 12-bit index for SImode only and
6562 hope that arm_gen_constant will enable ldrb to use more bits. */
6563 bits = (mode == SImode) ? 12 : 8;
6564 mask = (1 << bits) - 1;
6565 base = INTVAL (x) & ~mask;
6566 index = INTVAL (x) & mask;
6567 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6568 {
6569 /* It'll most probably be more efficient to generate the base
6570 with more bits set and use a negative index instead. */
6571 base |= mask;
6572 index -= mask;
6573 }
6574 base_reg = force_reg (SImode, GEN_INT (base));
6575 x = plus_constant (Pmode, base_reg, index);
6576 }
6577
6578 if (flag_pic)
6579 {
6580 /* We need to find and carefully transform any SYMBOL and LABEL
6581 references; so go back to the original address expression. */
6582 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6583
6584 if (new_x != orig_x)
6585 x = new_x;
6586 }
6587
6588 return x;
6589 }
6590
6591
6592 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6593 to be legitimate. If we find one, return the new, valid address. */
6594 rtx
6595 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6596 {
6597 if (arm_tls_symbol_p (x))
6598 return legitimize_tls_address (x, NULL_RTX);
6599
6600 if (GET_CODE (x) == PLUS
6601 && CONST_INT_P (XEXP (x, 1))
6602 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6603 || INTVAL (XEXP (x, 1)) < 0))
6604 {
6605 rtx xop0 = XEXP (x, 0);
6606 rtx xop1 = XEXP (x, 1);
6607 HOST_WIDE_INT offset = INTVAL (xop1);
6608
6609 /* Try and fold the offset into a biasing of the base register and
6610 then offsetting that. Don't do this when optimizing for space
6611 since it can cause too many CSEs. */
6612 if (optimize_size && offset >= 0
6613 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6614 {
6615 HOST_WIDE_INT delta;
6616
6617 if (offset >= 256)
6618 delta = offset - (256 - GET_MODE_SIZE (mode));
6619 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6620 delta = 31 * GET_MODE_SIZE (mode);
6621 else
6622 delta = offset & (~31 * GET_MODE_SIZE (mode));
6623
6624 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6625 NULL_RTX);
6626 x = plus_constant (Pmode, xop0, delta);
6627 }
6628 else if (offset < 0 && offset > -256)
6629 /* Small negative offsets are best done with a subtract before the
6630 dereference, forcing these into a register normally takes two
6631 instructions. */
6632 x = force_operand (x, NULL_RTX);
6633 else
6634 {
6635 /* For the remaining cases, force the constant into a register. */
6636 xop1 = force_reg (SImode, xop1);
6637 x = gen_rtx_PLUS (SImode, xop0, xop1);
6638 }
6639 }
6640 else if (GET_CODE (x) == PLUS
6641 && s_register_operand (XEXP (x, 1), SImode)
6642 && !s_register_operand (XEXP (x, 0), SImode))
6643 {
6644 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6645
6646 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6647 }
6648
6649 if (flag_pic)
6650 {
6651 /* We need to find and carefully transform any SYMBOL and LABEL
6652 references; so go back to the original address expression. */
6653 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6654
6655 if (new_x != orig_x)
6656 x = new_x;
6657 }
6658
6659 return x;
6660 }
6661
6662 bool
6663 arm_legitimize_reload_address (rtx *p,
6664 enum machine_mode mode,
6665 int opnum, int type,
6666 int ind_levels ATTRIBUTE_UNUSED)
6667 {
6668 /* We must recognize output that we have already generated ourselves. */
6669 if (GET_CODE (*p) == PLUS
6670 && GET_CODE (XEXP (*p, 0)) == PLUS
6671 && REG_P (XEXP (XEXP (*p, 0), 0))
6672 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6673 && CONST_INT_P (XEXP (*p, 1)))
6674 {
6675 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6676 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6677 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6678 return true;
6679 }
6680
6681 if (GET_CODE (*p) == PLUS
6682 && REG_P (XEXP (*p, 0))
6683 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6684 /* If the base register is equivalent to a constant, let the generic
6685 code handle it. Otherwise we will run into problems if a future
6686 reload pass decides to rematerialize the constant. */
6687 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6688 && CONST_INT_P (XEXP (*p, 1)))
6689 {
6690 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6691 HOST_WIDE_INT low, high;
6692
6693 /* Detect coprocessor load/stores. */
6694 bool coproc_p = ((TARGET_HARD_FLOAT
6695 && TARGET_VFP
6696 && (mode == SFmode || mode == DFmode))
6697 || (TARGET_REALLY_IWMMXT
6698 && VALID_IWMMXT_REG_MODE (mode))
6699 || (TARGET_NEON
6700 && (VALID_NEON_DREG_MODE (mode)
6701 || VALID_NEON_QREG_MODE (mode))));
6702
6703 /* For some conditions, bail out when lower two bits are unaligned. */
6704 if ((val & 0x3) != 0
6705 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6706 && (coproc_p
6707 /* For DI, and DF under soft-float: */
6708 || ((mode == DImode || mode == DFmode)
6709 /* Without ldrd, we use stm/ldm, which does not
6710 fair well with unaligned bits. */
6711 && (! TARGET_LDRD
6712 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6713 || TARGET_THUMB2))))
6714 return false;
6715
6716 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6717 of which the (reg+high) gets turned into a reload add insn,
6718 we try to decompose the index into high/low values that can often
6719 also lead to better reload CSE.
6720 For example:
6721 ldr r0, [r2, #4100] // Offset too large
6722 ldr r1, [r2, #4104] // Offset too large
6723
6724 is best reloaded as:
6725 add t1, r2, #4096
6726 ldr r0, [t1, #4]
6727 add t2, r2, #4096
6728 ldr r1, [t2, #8]
6729
6730 which post-reload CSE can simplify in most cases to eliminate the
6731 second add instruction:
6732 add t1, r2, #4096
6733 ldr r0, [t1, #4]
6734 ldr r1, [t1, #8]
6735
6736 The idea here is that we want to split out the bits of the constant
6737 as a mask, rather than as subtracting the maximum offset that the
6738 respective type of load/store used can handle.
6739
6740 When encountering negative offsets, we can still utilize it even if
6741 the overall offset is positive; sometimes this may lead to an immediate
6742 that can be constructed with fewer instructions.
6743 For example:
6744 ldr r0, [r2, #0x3FFFFC]
6745
6746 This is best reloaded as:
6747 add t1, r2, #0x400000
6748 ldr r0, [t1, #-4]
6749
6750 The trick for spotting this for a load insn with N bits of offset
6751 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6752 negative offset that is going to make bit N and all the bits below
6753 it become zero in the remainder part.
6754
6755 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6756 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6757 used in most cases of ARM load/store instructions. */
6758
6759 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6760 (((VAL) & ((1 << (N)) - 1)) \
6761 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6762 : 0)
6763
6764 if (coproc_p)
6765 {
6766 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6767
6768 /* NEON quad-word load/stores are made of two double-word accesses,
6769 so the valid index range is reduced by 8. Treat as 9-bit range if
6770 we go over it. */
6771 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6772 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6773 }
6774 else if (GET_MODE_SIZE (mode) == 8)
6775 {
6776 if (TARGET_LDRD)
6777 low = (TARGET_THUMB2
6778 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6779 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6780 else
6781 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6782 to access doublewords. The supported load/store offsets are
6783 -8, -4, and 4, which we try to produce here. */
6784 low = ((val & 0xf) ^ 0x8) - 0x8;
6785 }
6786 else if (GET_MODE_SIZE (mode) < 8)
6787 {
6788 /* NEON element load/stores do not have an offset. */
6789 if (TARGET_NEON_FP16 && mode == HFmode)
6790 return false;
6791
6792 if (TARGET_THUMB2)
6793 {
6794 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6795 Try the wider 12-bit range first, and re-try if the result
6796 is out of range. */
6797 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6798 if (low < -255)
6799 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6800 }
6801 else
6802 {
6803 if (mode == HImode || mode == HFmode)
6804 {
6805 if (arm_arch4)
6806 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6807 else
6808 {
6809 /* The storehi/movhi_bytes fallbacks can use only
6810 [-4094,+4094] of the full ldrb/strb index range. */
6811 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6812 if (low == 4095 || low == -4095)
6813 return false;
6814 }
6815 }
6816 else
6817 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6818 }
6819 }
6820 else
6821 return false;
6822
6823 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6824 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6825 - (unsigned HOST_WIDE_INT) 0x80000000);
6826 /* Check for overflow or zero */
6827 if (low == 0 || high == 0 || (high + low != val))
6828 return false;
6829
6830 /* Reload the high part into a base reg; leave the low part
6831 in the mem. */
6832 *p = gen_rtx_PLUS (GET_MODE (*p),
6833 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6834 GEN_INT (high)),
6835 GEN_INT (low));
6836 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6837 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6838 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6839 return true;
6840 }
6841
6842 return false;
6843 }
6844
6845 rtx
6846 thumb_legitimize_reload_address (rtx *x_p,
6847 enum machine_mode mode,
6848 int opnum, int type,
6849 int ind_levels ATTRIBUTE_UNUSED)
6850 {
6851 rtx x = *x_p;
6852
6853 if (GET_CODE (x) == PLUS
6854 && GET_MODE_SIZE (mode) < 4
6855 && REG_P (XEXP (x, 0))
6856 && XEXP (x, 0) == stack_pointer_rtx
6857 && CONST_INT_P (XEXP (x, 1))
6858 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6859 {
6860 rtx orig_x = x;
6861
6862 x = copy_rtx (x);
6863 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6864 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6865 return x;
6866 }
6867
6868 /* If both registers are hi-regs, then it's better to reload the
6869 entire expression rather than each register individually. That
6870 only requires one reload register rather than two. */
6871 if (GET_CODE (x) == PLUS
6872 && REG_P (XEXP (x, 0))
6873 && REG_P (XEXP (x, 1))
6874 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6875 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6876 {
6877 rtx orig_x = x;
6878
6879 x = copy_rtx (x);
6880 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6881 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6882 return x;
6883 }
6884
6885 return NULL;
6886 }
6887
6888 /* Test for various thread-local symbols. */
6889
6890 /* Return TRUE if X is a thread-local symbol. */
6891
6892 static bool
6893 arm_tls_symbol_p (rtx x)
6894 {
6895 if (! TARGET_HAVE_TLS)
6896 return false;
6897
6898 if (GET_CODE (x) != SYMBOL_REF)
6899 return false;
6900
6901 return SYMBOL_REF_TLS_MODEL (x) != 0;
6902 }
6903
6904 /* Helper for arm_tls_referenced_p. */
6905
6906 static int
6907 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6908 {
6909 if (GET_CODE (*x) == SYMBOL_REF)
6910 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6911
6912 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6913 TLS offsets, not real symbol references. */
6914 if (GET_CODE (*x) == UNSPEC
6915 && XINT (*x, 1) == UNSPEC_TLS)
6916 return -1;
6917
6918 return 0;
6919 }
6920
6921 /* Return TRUE if X contains any TLS symbol references. */
6922
6923 bool
6924 arm_tls_referenced_p (rtx x)
6925 {
6926 if (! TARGET_HAVE_TLS)
6927 return false;
6928
6929 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6930 }
6931
6932 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6933
6934 On the ARM, allow any integer (invalid ones are removed later by insn
6935 patterns), nice doubles and symbol_refs which refer to the function's
6936 constant pool XXX.
6937
6938 When generating pic allow anything. */
6939
6940 static bool
6941 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6942 {
6943 /* At present, we have no support for Neon structure constants, so forbid
6944 them here. It might be possible to handle simple cases like 0 and -1
6945 in future. */
6946 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6947 return false;
6948
6949 return flag_pic || !label_mentioned_p (x);
6950 }
6951
6952 static bool
6953 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6954 {
6955 return (CONST_INT_P (x)
6956 || CONST_DOUBLE_P (x)
6957 || CONSTANT_ADDRESS_P (x)
6958 || flag_pic);
6959 }
6960
6961 static bool
6962 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6963 {
6964 return (!arm_cannot_force_const_mem (mode, x)
6965 && (TARGET_32BIT
6966 ? arm_legitimate_constant_p_1 (mode, x)
6967 : thumb_legitimate_constant_p (mode, x)));
6968 }
6969
6970 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6971
6972 static bool
6973 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6974 {
6975 rtx base, offset;
6976
6977 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6978 {
6979 split_const (x, &base, &offset);
6980 if (GET_CODE (base) == SYMBOL_REF
6981 && !offset_within_block_p (base, INTVAL (offset)))
6982 return true;
6983 }
6984 return arm_tls_referenced_p (x);
6985 }
6986 \f
6987 #define REG_OR_SUBREG_REG(X) \
6988 (REG_P (X) \
6989 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
6990
6991 #define REG_OR_SUBREG_RTX(X) \
6992 (REG_P (X) ? (X) : SUBREG_REG (X))
6993
6994 static inline int
6995 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6996 {
6997 enum machine_mode mode = GET_MODE (x);
6998 int total;
6999
7000 switch (code)
7001 {
7002 case ASHIFT:
7003 case ASHIFTRT:
7004 case LSHIFTRT:
7005 case ROTATERT:
7006 case PLUS:
7007 case MINUS:
7008 case COMPARE:
7009 case NEG:
7010 case NOT:
7011 return COSTS_N_INSNS (1);
7012
7013 case MULT:
7014 if (CONST_INT_P (XEXP (x, 1)))
7015 {
7016 int cycles = 0;
7017 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7018
7019 while (i)
7020 {
7021 i >>= 2;
7022 cycles++;
7023 }
7024 return COSTS_N_INSNS (2) + cycles;
7025 }
7026 return COSTS_N_INSNS (1) + 16;
7027
7028 case SET:
7029 return (COSTS_N_INSNS (1)
7030 + 4 * ((MEM_P (SET_SRC (x)))
7031 + MEM_P (SET_DEST (x))));
7032
7033 case CONST_INT:
7034 if (outer == SET)
7035 {
7036 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7037 return 0;
7038 if (thumb_shiftable_const (INTVAL (x)))
7039 return COSTS_N_INSNS (2);
7040 return COSTS_N_INSNS (3);
7041 }
7042 else if ((outer == PLUS || outer == COMPARE)
7043 && INTVAL (x) < 256 && INTVAL (x) > -256)
7044 return 0;
7045 else if ((outer == IOR || outer == XOR || outer == AND)
7046 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7047 return COSTS_N_INSNS (1);
7048 else if (outer == AND)
7049 {
7050 int i;
7051 /* This duplicates the tests in the andsi3 expander. */
7052 for (i = 9; i <= 31; i++)
7053 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7054 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7055 return COSTS_N_INSNS (2);
7056 }
7057 else if (outer == ASHIFT || outer == ASHIFTRT
7058 || outer == LSHIFTRT)
7059 return 0;
7060 return COSTS_N_INSNS (2);
7061
7062 case CONST:
7063 case CONST_DOUBLE:
7064 case LABEL_REF:
7065 case SYMBOL_REF:
7066 return COSTS_N_INSNS (3);
7067
7068 case UDIV:
7069 case UMOD:
7070 case DIV:
7071 case MOD:
7072 return 100;
7073
7074 case TRUNCATE:
7075 return 99;
7076
7077 case AND:
7078 case XOR:
7079 case IOR:
7080 /* XXX guess. */
7081 return 8;
7082
7083 case MEM:
7084 /* XXX another guess. */
7085 /* Memory costs quite a lot for the first word, but subsequent words
7086 load at the equivalent of a single insn each. */
7087 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7088 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7089 ? 4 : 0));
7090
7091 case IF_THEN_ELSE:
7092 /* XXX a guess. */
7093 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7094 return 14;
7095 return 2;
7096
7097 case SIGN_EXTEND:
7098 case ZERO_EXTEND:
7099 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7100 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7101
7102 if (mode == SImode)
7103 return total;
7104
7105 if (arm_arch6)
7106 return total + COSTS_N_INSNS (1);
7107
7108 /* Assume a two-shift sequence. Increase the cost slightly so
7109 we prefer actual shifts over an extend operation. */
7110 return total + 1 + COSTS_N_INSNS (2);
7111
7112 default:
7113 return 99;
7114 }
7115 }
7116
7117 static inline bool
7118 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7119 {
7120 enum machine_mode mode = GET_MODE (x);
7121 enum rtx_code subcode;
7122 rtx operand;
7123 enum rtx_code code = GET_CODE (x);
7124 *total = 0;
7125
7126 switch (code)
7127 {
7128 case MEM:
7129 /* Memory costs quite a lot for the first word, but subsequent words
7130 load at the equivalent of a single insn each. */
7131 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7132 return true;
7133
7134 case DIV:
7135 case MOD:
7136 case UDIV:
7137 case UMOD:
7138 if (TARGET_HARD_FLOAT && mode == SFmode)
7139 *total = COSTS_N_INSNS (2);
7140 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7141 *total = COSTS_N_INSNS (4);
7142 else
7143 *total = COSTS_N_INSNS (20);
7144 return false;
7145
7146 case ROTATE:
7147 if (REG_P (XEXP (x, 1)))
7148 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7149 else if (!CONST_INT_P (XEXP (x, 1)))
7150 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7151
7152 /* Fall through */
7153 case ROTATERT:
7154 if (mode != SImode)
7155 {
7156 *total += COSTS_N_INSNS (4);
7157 return true;
7158 }
7159
7160 /* Fall through */
7161 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7162 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7163 if (mode == DImode)
7164 {
7165 *total += COSTS_N_INSNS (3);
7166 return true;
7167 }
7168
7169 *total += COSTS_N_INSNS (1);
7170 /* Increase the cost of complex shifts because they aren't any faster,
7171 and reduce dual issue opportunities. */
7172 if (arm_tune_cortex_a9
7173 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7174 ++*total;
7175
7176 return true;
7177
7178 case MINUS:
7179 if (mode == DImode)
7180 {
7181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7182 if (CONST_INT_P (XEXP (x, 0))
7183 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7184 {
7185 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7186 return true;
7187 }
7188
7189 if (CONST_INT_P (XEXP (x, 1))
7190 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7191 {
7192 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7193 return true;
7194 }
7195
7196 return false;
7197 }
7198
7199 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7200 {
7201 if (TARGET_HARD_FLOAT
7202 && (mode == SFmode
7203 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7204 {
7205 *total = COSTS_N_INSNS (1);
7206 if (CONST_DOUBLE_P (XEXP (x, 0))
7207 && arm_const_double_rtx (XEXP (x, 0)))
7208 {
7209 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7210 return true;
7211 }
7212
7213 if (CONST_DOUBLE_P (XEXP (x, 1))
7214 && arm_const_double_rtx (XEXP (x, 1)))
7215 {
7216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7217 return true;
7218 }
7219
7220 return false;
7221 }
7222 *total = COSTS_N_INSNS (20);
7223 return false;
7224 }
7225
7226 *total = COSTS_N_INSNS (1);
7227 if (CONST_INT_P (XEXP (x, 0))
7228 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7229 {
7230 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7231 return true;
7232 }
7233
7234 subcode = GET_CODE (XEXP (x, 1));
7235 if (subcode == ASHIFT || subcode == ASHIFTRT
7236 || subcode == LSHIFTRT
7237 || subcode == ROTATE || subcode == ROTATERT)
7238 {
7239 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7240 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7241 return true;
7242 }
7243
7244 /* A shift as a part of RSB costs no more than RSB itself. */
7245 if (GET_CODE (XEXP (x, 0)) == MULT
7246 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7247 {
7248 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7249 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7250 return true;
7251 }
7252
7253 if (subcode == MULT
7254 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7255 {
7256 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7257 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7258 return true;
7259 }
7260
7261 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7262 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7263 {
7264 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7265 if (REG_P (XEXP (XEXP (x, 1), 0))
7266 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7267 *total += COSTS_N_INSNS (1);
7268
7269 return true;
7270 }
7271
7272 /* Fall through */
7273
7274 case PLUS:
7275 if (code == PLUS && arm_arch6 && mode == SImode
7276 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7277 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7278 {
7279 *total = COSTS_N_INSNS (1);
7280 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7281 0, speed);
7282 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7283 return true;
7284 }
7285
7286 /* MLA: All arguments must be registers. We filter out
7287 multiplication by a power of two, so that we fall down into
7288 the code below. */
7289 if (GET_CODE (XEXP (x, 0)) == MULT
7290 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7291 {
7292 /* The cost comes from the cost of the multiply. */
7293 return false;
7294 }
7295
7296 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7297 {
7298 if (TARGET_HARD_FLOAT
7299 && (mode == SFmode
7300 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7301 {
7302 *total = COSTS_N_INSNS (1);
7303 if (CONST_DOUBLE_P (XEXP (x, 1))
7304 && arm_const_double_rtx (XEXP (x, 1)))
7305 {
7306 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7307 return true;
7308 }
7309
7310 return false;
7311 }
7312
7313 *total = COSTS_N_INSNS (20);
7314 return false;
7315 }
7316
7317 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7318 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7319 {
7320 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7321 if (REG_P (XEXP (XEXP (x, 0), 0))
7322 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7323 *total += COSTS_N_INSNS (1);
7324 return true;
7325 }
7326
7327 /* Fall through */
7328
7329 case AND: case XOR: case IOR:
7330
7331 /* Normally the frame registers will be spilt into reg+const during
7332 reload, so it is a bad idea to combine them with other instructions,
7333 since then they might not be moved outside of loops. As a compromise
7334 we allow integration with ops that have a constant as their second
7335 operand. */
7336 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7337 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7338 && !CONST_INT_P (XEXP (x, 1)))
7339 *total = COSTS_N_INSNS (1);
7340
7341 if (mode == DImode)
7342 {
7343 *total += COSTS_N_INSNS (2);
7344 if (CONST_INT_P (XEXP (x, 1))
7345 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7346 {
7347 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7348 return true;
7349 }
7350
7351 return false;
7352 }
7353
7354 *total += COSTS_N_INSNS (1);
7355 if (CONST_INT_P (XEXP (x, 1))
7356 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7357 {
7358 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7359 return true;
7360 }
7361 subcode = GET_CODE (XEXP (x, 0));
7362 if (subcode == ASHIFT || subcode == ASHIFTRT
7363 || subcode == LSHIFTRT
7364 || subcode == ROTATE || subcode == ROTATERT)
7365 {
7366 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7367 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7368 return true;
7369 }
7370
7371 if (subcode == MULT
7372 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7373 {
7374 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7375 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7376 return true;
7377 }
7378
7379 if (subcode == UMIN || subcode == UMAX
7380 || subcode == SMIN || subcode == SMAX)
7381 {
7382 *total = COSTS_N_INSNS (3);
7383 return true;
7384 }
7385
7386 return false;
7387
7388 case MULT:
7389 /* This should have been handled by the CPU specific routines. */
7390 gcc_unreachable ();
7391
7392 case TRUNCATE:
7393 if (arm_arch3m && mode == SImode
7394 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7396 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7397 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7398 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7399 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7400 {
7401 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7402 return true;
7403 }
7404 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7405 return false;
7406
7407 case NEG:
7408 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7409 {
7410 if (TARGET_HARD_FLOAT
7411 && (mode == SFmode
7412 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7413 {
7414 *total = COSTS_N_INSNS (1);
7415 return false;
7416 }
7417 *total = COSTS_N_INSNS (2);
7418 return false;
7419 }
7420
7421 /* Fall through */
7422 case NOT:
7423 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7424 if (mode == SImode && code == NOT)
7425 {
7426 subcode = GET_CODE (XEXP (x, 0));
7427 if (subcode == ASHIFT || subcode == ASHIFTRT
7428 || subcode == LSHIFTRT
7429 || subcode == ROTATE || subcode == ROTATERT
7430 || (subcode == MULT
7431 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7432 {
7433 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7434 /* Register shifts cost an extra cycle. */
7435 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7436 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7437 subcode, 1, speed);
7438 return true;
7439 }
7440 }
7441
7442 return false;
7443
7444 case IF_THEN_ELSE:
7445 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7446 {
7447 *total = COSTS_N_INSNS (4);
7448 return true;
7449 }
7450
7451 operand = XEXP (x, 0);
7452
7453 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7454 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7455 && REG_P (XEXP (operand, 0))
7456 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7457 *total += COSTS_N_INSNS (1);
7458 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7459 + rtx_cost (XEXP (x, 2), code, 2, speed));
7460 return true;
7461
7462 case NE:
7463 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7464 {
7465 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7466 return true;
7467 }
7468 goto scc_insn;
7469
7470 case GE:
7471 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7472 && mode == SImode && XEXP (x, 1) == const0_rtx)
7473 {
7474 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7475 return true;
7476 }
7477 goto scc_insn;
7478
7479 case LT:
7480 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7481 && mode == SImode && XEXP (x, 1) == const0_rtx)
7482 {
7483 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7484 return true;
7485 }
7486 goto scc_insn;
7487
7488 case EQ:
7489 case GT:
7490 case LE:
7491 case GEU:
7492 case LTU:
7493 case GTU:
7494 case LEU:
7495 case UNORDERED:
7496 case ORDERED:
7497 case UNEQ:
7498 case UNGE:
7499 case UNLT:
7500 case UNGT:
7501 case UNLE:
7502 scc_insn:
7503 /* SCC insns. In the case where the comparison has already been
7504 performed, then they cost 2 instructions. Otherwise they need
7505 an additional comparison before them. */
7506 *total = COSTS_N_INSNS (2);
7507 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7508 {
7509 return true;
7510 }
7511
7512 /* Fall through */
7513 case COMPARE:
7514 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7515 {
7516 *total = 0;
7517 return true;
7518 }
7519
7520 *total += COSTS_N_INSNS (1);
7521 if (CONST_INT_P (XEXP (x, 1))
7522 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7523 {
7524 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7525 return true;
7526 }
7527
7528 subcode = GET_CODE (XEXP (x, 0));
7529 if (subcode == ASHIFT || subcode == ASHIFTRT
7530 || subcode == LSHIFTRT
7531 || subcode == ROTATE || subcode == ROTATERT)
7532 {
7533 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7534 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7535 return true;
7536 }
7537
7538 if (subcode == MULT
7539 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7540 {
7541 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7542 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7543 return true;
7544 }
7545
7546 return false;
7547
7548 case UMIN:
7549 case UMAX:
7550 case SMIN:
7551 case SMAX:
7552 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7553 if (!CONST_INT_P (XEXP (x, 1))
7554 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7555 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7556 return true;
7557
7558 case ABS:
7559 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7560 {
7561 if (TARGET_HARD_FLOAT
7562 && (mode == SFmode
7563 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7564 {
7565 *total = COSTS_N_INSNS (1);
7566 return false;
7567 }
7568 *total = COSTS_N_INSNS (20);
7569 return false;
7570 }
7571 *total = COSTS_N_INSNS (1);
7572 if (mode == DImode)
7573 *total += COSTS_N_INSNS (3);
7574 return false;
7575
7576 case SIGN_EXTEND:
7577 case ZERO_EXTEND:
7578 *total = 0;
7579 if (GET_MODE_CLASS (mode) == MODE_INT)
7580 {
7581 rtx op = XEXP (x, 0);
7582 enum machine_mode opmode = GET_MODE (op);
7583
7584 if (mode == DImode)
7585 *total += COSTS_N_INSNS (1);
7586
7587 if (opmode != SImode)
7588 {
7589 if (MEM_P (op))
7590 {
7591 /* If !arm_arch4, we use one of the extendhisi2_mem
7592 or movhi_bytes patterns for HImode. For a QImode
7593 sign extension, we first zero-extend from memory
7594 and then perform a shift sequence. */
7595 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7596 *total += COSTS_N_INSNS (2);
7597 }
7598 else if (arm_arch6)
7599 *total += COSTS_N_INSNS (1);
7600
7601 /* We don't have the necessary insn, so we need to perform some
7602 other operation. */
7603 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7604 /* An and with constant 255. */
7605 *total += COSTS_N_INSNS (1);
7606 else
7607 /* A shift sequence. Increase costs slightly to avoid
7608 combining two shifts into an extend operation. */
7609 *total += COSTS_N_INSNS (2) + 1;
7610 }
7611
7612 return false;
7613 }
7614
7615 switch (GET_MODE (XEXP (x, 0)))
7616 {
7617 case V8QImode:
7618 case V4HImode:
7619 case V2SImode:
7620 case V4QImode:
7621 case V2HImode:
7622 *total = COSTS_N_INSNS (1);
7623 return false;
7624
7625 default:
7626 gcc_unreachable ();
7627 }
7628 gcc_unreachable ();
7629
7630 case ZERO_EXTRACT:
7631 case SIGN_EXTRACT:
7632 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7633 return true;
7634
7635 case CONST_INT:
7636 if (const_ok_for_arm (INTVAL (x))
7637 || const_ok_for_arm (~INTVAL (x)))
7638 *total = COSTS_N_INSNS (1);
7639 else
7640 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7641 INTVAL (x), NULL_RTX,
7642 NULL_RTX, 0, 0));
7643 return true;
7644
7645 case CONST:
7646 case LABEL_REF:
7647 case SYMBOL_REF:
7648 *total = COSTS_N_INSNS (3);
7649 return true;
7650
7651 case HIGH:
7652 *total = COSTS_N_INSNS (1);
7653 return true;
7654
7655 case LO_SUM:
7656 *total = COSTS_N_INSNS (1);
7657 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7658 return true;
7659
7660 case CONST_DOUBLE:
7661 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7662 && (mode == SFmode || !TARGET_VFP_SINGLE))
7663 *total = COSTS_N_INSNS (1);
7664 else
7665 *total = COSTS_N_INSNS (4);
7666 return true;
7667
7668 case SET:
7669 return false;
7670
7671 case UNSPEC:
7672 /* We cost this as high as our memory costs to allow this to
7673 be hoisted from loops. */
7674 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7675 {
7676 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7677 }
7678 return true;
7679
7680 case CONST_VECTOR:
7681 if (TARGET_NEON
7682 && TARGET_HARD_FLOAT
7683 && outer == SET
7684 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7685 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7686 *total = COSTS_N_INSNS (1);
7687 else
7688 *total = COSTS_N_INSNS (4);
7689 return true;
7690
7691 default:
7692 *total = COSTS_N_INSNS (4);
7693 return false;
7694 }
7695 }
7696
7697 /* Estimates the size cost of thumb1 instructions.
7698 For now most of the code is copied from thumb1_rtx_costs. We need more
7699 fine grain tuning when we have more related test cases. */
7700 static inline int
7701 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7702 {
7703 enum machine_mode mode = GET_MODE (x);
7704
7705 switch (code)
7706 {
7707 case ASHIFT:
7708 case ASHIFTRT:
7709 case LSHIFTRT:
7710 case ROTATERT:
7711 case PLUS:
7712 case MINUS:
7713 case COMPARE:
7714 case NEG:
7715 case NOT:
7716 return COSTS_N_INSNS (1);
7717
7718 case MULT:
7719 if (CONST_INT_P (XEXP (x, 1)))
7720 {
7721 /* Thumb1 mul instruction can't operate on const. We must Load it
7722 into a register first. */
7723 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7724 return COSTS_N_INSNS (1) + const_size;
7725 }
7726 return COSTS_N_INSNS (1);
7727
7728 case SET:
7729 return (COSTS_N_INSNS (1)
7730 + 4 * ((MEM_P (SET_SRC (x)))
7731 + MEM_P (SET_DEST (x))));
7732
7733 case CONST_INT:
7734 if (outer == SET)
7735 {
7736 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7737 return COSTS_N_INSNS (1);
7738 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7739 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7740 return COSTS_N_INSNS (2);
7741 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7742 if (thumb_shiftable_const (INTVAL (x)))
7743 return COSTS_N_INSNS (2);
7744 return COSTS_N_INSNS (3);
7745 }
7746 else if ((outer == PLUS || outer == COMPARE)
7747 && INTVAL (x) < 256 && INTVAL (x) > -256)
7748 return 0;
7749 else if ((outer == IOR || outer == XOR || outer == AND)
7750 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7751 return COSTS_N_INSNS (1);
7752 else if (outer == AND)
7753 {
7754 int i;
7755 /* This duplicates the tests in the andsi3 expander. */
7756 for (i = 9; i <= 31; i++)
7757 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7758 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7759 return COSTS_N_INSNS (2);
7760 }
7761 else if (outer == ASHIFT || outer == ASHIFTRT
7762 || outer == LSHIFTRT)
7763 return 0;
7764 return COSTS_N_INSNS (2);
7765
7766 case CONST:
7767 case CONST_DOUBLE:
7768 case LABEL_REF:
7769 case SYMBOL_REF:
7770 return COSTS_N_INSNS (3);
7771
7772 case UDIV:
7773 case UMOD:
7774 case DIV:
7775 case MOD:
7776 return 100;
7777
7778 case TRUNCATE:
7779 return 99;
7780
7781 case AND:
7782 case XOR:
7783 case IOR:
7784 /* XXX guess. */
7785 return 8;
7786
7787 case MEM:
7788 /* XXX another guess. */
7789 /* Memory costs quite a lot for the first word, but subsequent words
7790 load at the equivalent of a single insn each. */
7791 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7792 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7793 ? 4 : 0));
7794
7795 case IF_THEN_ELSE:
7796 /* XXX a guess. */
7797 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7798 return 14;
7799 return 2;
7800
7801 case ZERO_EXTEND:
7802 /* XXX still guessing. */
7803 switch (GET_MODE (XEXP (x, 0)))
7804 {
7805 case QImode:
7806 return (1 + (mode == DImode ? 4 : 0)
7807 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7808
7809 case HImode:
7810 return (4 + (mode == DImode ? 4 : 0)
7811 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7812
7813 case SImode:
7814 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7815
7816 default:
7817 return 99;
7818 }
7819
7820 default:
7821 return 99;
7822 }
7823 }
7824
7825 /* RTX costs when optimizing for size. */
7826 static bool
7827 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7828 int *total)
7829 {
7830 enum machine_mode mode = GET_MODE (x);
7831 if (TARGET_THUMB1)
7832 {
7833 *total = thumb1_size_rtx_costs (x, code, outer_code);
7834 return true;
7835 }
7836
7837 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7838 switch (code)
7839 {
7840 case MEM:
7841 /* A memory access costs 1 insn if the mode is small, or the address is
7842 a single register, otherwise it costs one insn per word. */
7843 if (REG_P (XEXP (x, 0)))
7844 *total = COSTS_N_INSNS (1);
7845 else if (flag_pic
7846 && GET_CODE (XEXP (x, 0)) == PLUS
7847 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7848 /* This will be split into two instructions.
7849 See arm.md:calculate_pic_address. */
7850 *total = COSTS_N_INSNS (2);
7851 else
7852 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7853 return true;
7854
7855 case DIV:
7856 case MOD:
7857 case UDIV:
7858 case UMOD:
7859 /* Needs a libcall, so it costs about this. */
7860 *total = COSTS_N_INSNS (2);
7861 return false;
7862
7863 case ROTATE:
7864 if (mode == SImode && REG_P (XEXP (x, 1)))
7865 {
7866 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7867 return true;
7868 }
7869 /* Fall through */
7870 case ROTATERT:
7871 case ASHIFT:
7872 case LSHIFTRT:
7873 case ASHIFTRT:
7874 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
7875 {
7876 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7877 return true;
7878 }
7879 else if (mode == SImode)
7880 {
7881 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7882 /* Slightly disparage register shifts, but not by much. */
7883 if (!CONST_INT_P (XEXP (x, 1)))
7884 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7885 return true;
7886 }
7887
7888 /* Needs a libcall. */
7889 *total = COSTS_N_INSNS (2);
7890 return false;
7891
7892 case MINUS:
7893 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7894 && (mode == SFmode || !TARGET_VFP_SINGLE))
7895 {
7896 *total = COSTS_N_INSNS (1);
7897 return false;
7898 }
7899
7900 if (mode == SImode)
7901 {
7902 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7903 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7904
7905 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7906 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7907 || subcode1 == ROTATE || subcode1 == ROTATERT
7908 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7909 || subcode1 == ASHIFTRT)
7910 {
7911 /* It's just the cost of the two operands. */
7912 *total = 0;
7913 return false;
7914 }
7915
7916 *total = COSTS_N_INSNS (1);
7917 return false;
7918 }
7919
7920 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7921 return false;
7922
7923 case PLUS:
7924 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7925 && (mode == SFmode || !TARGET_VFP_SINGLE))
7926 {
7927 *total = COSTS_N_INSNS (1);
7928 return false;
7929 }
7930
7931 /* A shift as a part of ADD costs nothing. */
7932 if (GET_CODE (XEXP (x, 0)) == MULT
7933 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7934 {
7935 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7936 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7937 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7938 return true;
7939 }
7940
7941 /* Fall through */
7942 case AND: case XOR: case IOR:
7943 if (mode == SImode)
7944 {
7945 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7946
7947 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7948 || subcode == LSHIFTRT || subcode == ASHIFTRT
7949 || (code == AND && subcode == NOT))
7950 {
7951 /* It's just the cost of the two operands. */
7952 *total = 0;
7953 return false;
7954 }
7955 }
7956
7957 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7958 return false;
7959
7960 case MULT:
7961 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7962 return false;
7963
7964 case NEG:
7965 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7966 && (mode == SFmode || !TARGET_VFP_SINGLE))
7967 {
7968 *total = COSTS_N_INSNS (1);
7969 return false;
7970 }
7971
7972 /* Fall through */
7973 case NOT:
7974 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7975
7976 return false;
7977
7978 case IF_THEN_ELSE:
7979 *total = 0;
7980 return false;
7981
7982 case COMPARE:
7983 if (cc_register (XEXP (x, 0), VOIDmode))
7984 * total = 0;
7985 else
7986 *total = COSTS_N_INSNS (1);
7987 return false;
7988
7989 case ABS:
7990 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7991 && (mode == SFmode || !TARGET_VFP_SINGLE))
7992 *total = COSTS_N_INSNS (1);
7993 else
7994 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7995 return false;
7996
7997 case SIGN_EXTEND:
7998 case ZERO_EXTEND:
7999 return arm_rtx_costs_1 (x, outer_code, total, 0);
8000
8001 case CONST_INT:
8002 if (const_ok_for_arm (INTVAL (x)))
8003 /* A multiplication by a constant requires another instruction
8004 to load the constant to a register. */
8005 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8006 ? 1 : 0);
8007 else if (const_ok_for_arm (~INTVAL (x)))
8008 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8009 else if (const_ok_for_arm (-INTVAL (x)))
8010 {
8011 if (outer_code == COMPARE || outer_code == PLUS
8012 || outer_code == MINUS)
8013 *total = 0;
8014 else
8015 *total = COSTS_N_INSNS (1);
8016 }
8017 else
8018 *total = COSTS_N_INSNS (2);
8019 return true;
8020
8021 case CONST:
8022 case LABEL_REF:
8023 case SYMBOL_REF:
8024 *total = COSTS_N_INSNS (2);
8025 return true;
8026
8027 case CONST_DOUBLE:
8028 *total = COSTS_N_INSNS (4);
8029 return true;
8030
8031 case CONST_VECTOR:
8032 if (TARGET_NEON
8033 && TARGET_HARD_FLOAT
8034 && outer_code == SET
8035 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8036 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8037 *total = COSTS_N_INSNS (1);
8038 else
8039 *total = COSTS_N_INSNS (4);
8040 return true;
8041
8042 case HIGH:
8043 case LO_SUM:
8044 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8045 cost of these slightly. */
8046 *total = COSTS_N_INSNS (1) + 1;
8047 return true;
8048
8049 case SET:
8050 return false;
8051
8052 default:
8053 if (mode != VOIDmode)
8054 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8055 else
8056 *total = COSTS_N_INSNS (4); /* How knows? */
8057 return false;
8058 }
8059 }
8060
8061 /* RTX costs when optimizing for size. */
8062 static bool
8063 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8064 int *total, bool speed)
8065 {
8066 if (!speed)
8067 return arm_size_rtx_costs (x, (enum rtx_code) code,
8068 (enum rtx_code) outer_code, total);
8069 else
8070 return current_tune->rtx_costs (x, (enum rtx_code) code,
8071 (enum rtx_code) outer_code,
8072 total, speed);
8073 }
8074
8075 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8076 supported on any "slowmul" cores, so it can be ignored. */
8077
8078 static bool
8079 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8080 int *total, bool speed)
8081 {
8082 enum machine_mode mode = GET_MODE (x);
8083
8084 if (TARGET_THUMB)
8085 {
8086 *total = thumb1_rtx_costs (x, code, outer_code);
8087 return true;
8088 }
8089
8090 switch (code)
8091 {
8092 case MULT:
8093 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8094 || mode == DImode)
8095 {
8096 *total = COSTS_N_INSNS (20);
8097 return false;
8098 }
8099
8100 if (CONST_INT_P (XEXP (x, 1)))
8101 {
8102 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8103 & (unsigned HOST_WIDE_INT) 0xffffffff);
8104 int cost, const_ok = const_ok_for_arm (i);
8105 int j, booth_unit_size;
8106
8107 /* Tune as appropriate. */
8108 cost = const_ok ? 4 : 8;
8109 booth_unit_size = 2;
8110 for (j = 0; i && j < 32; j += booth_unit_size)
8111 {
8112 i >>= booth_unit_size;
8113 cost++;
8114 }
8115
8116 *total = COSTS_N_INSNS (cost);
8117 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8118 return true;
8119 }
8120
8121 *total = COSTS_N_INSNS (20);
8122 return false;
8123
8124 default:
8125 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8126 }
8127 }
8128
8129
8130 /* RTX cost for cores with a fast multiply unit (M variants). */
8131
8132 static bool
8133 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8134 int *total, bool speed)
8135 {
8136 enum machine_mode mode = GET_MODE (x);
8137
8138 if (TARGET_THUMB1)
8139 {
8140 *total = thumb1_rtx_costs (x, code, outer_code);
8141 return true;
8142 }
8143
8144 /* ??? should thumb2 use different costs? */
8145 switch (code)
8146 {
8147 case MULT:
8148 /* There is no point basing this on the tuning, since it is always the
8149 fast variant if it exists at all. */
8150 if (mode == DImode
8151 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8152 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8153 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8154 {
8155 *total = COSTS_N_INSNS(2);
8156 return false;
8157 }
8158
8159
8160 if (mode == DImode)
8161 {
8162 *total = COSTS_N_INSNS (5);
8163 return false;
8164 }
8165
8166 if (CONST_INT_P (XEXP (x, 1)))
8167 {
8168 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8169 & (unsigned HOST_WIDE_INT) 0xffffffff);
8170 int cost, const_ok = const_ok_for_arm (i);
8171 int j, booth_unit_size;
8172
8173 /* Tune as appropriate. */
8174 cost = const_ok ? 4 : 8;
8175 booth_unit_size = 8;
8176 for (j = 0; i && j < 32; j += booth_unit_size)
8177 {
8178 i >>= booth_unit_size;
8179 cost++;
8180 }
8181
8182 *total = COSTS_N_INSNS(cost);
8183 return false;
8184 }
8185
8186 if (mode == SImode)
8187 {
8188 *total = COSTS_N_INSNS (4);
8189 return false;
8190 }
8191
8192 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8193 {
8194 if (TARGET_HARD_FLOAT
8195 && (mode == SFmode
8196 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8197 {
8198 *total = COSTS_N_INSNS (1);
8199 return false;
8200 }
8201 }
8202
8203 /* Requires a lib call */
8204 *total = COSTS_N_INSNS (20);
8205 return false;
8206
8207 default:
8208 return arm_rtx_costs_1 (x, outer_code, total, speed);
8209 }
8210 }
8211
8212
8213 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8214 so it can be ignored. */
8215
8216 static bool
8217 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8218 int *total, bool speed)
8219 {
8220 enum machine_mode mode = GET_MODE (x);
8221
8222 if (TARGET_THUMB)
8223 {
8224 *total = thumb1_rtx_costs (x, code, outer_code);
8225 return true;
8226 }
8227
8228 switch (code)
8229 {
8230 case COMPARE:
8231 if (GET_CODE (XEXP (x, 0)) != MULT)
8232 return arm_rtx_costs_1 (x, outer_code, total, speed);
8233
8234 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8235 will stall until the multiplication is complete. */
8236 *total = COSTS_N_INSNS (3);
8237 return false;
8238
8239 case MULT:
8240 /* There is no point basing this on the tuning, since it is always the
8241 fast variant if it exists at all. */
8242 if (mode == DImode
8243 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8244 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8245 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8246 {
8247 *total = COSTS_N_INSNS (2);
8248 return false;
8249 }
8250
8251
8252 if (mode == DImode)
8253 {
8254 *total = COSTS_N_INSNS (5);
8255 return false;
8256 }
8257
8258 if (CONST_INT_P (XEXP (x, 1)))
8259 {
8260 /* If operand 1 is a constant we can more accurately
8261 calculate the cost of the multiply. The multiplier can
8262 retire 15 bits on the first cycle and a further 12 on the
8263 second. We do, of course, have to load the constant into
8264 a register first. */
8265 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8266 /* There's a general overhead of one cycle. */
8267 int cost = 1;
8268 unsigned HOST_WIDE_INT masked_const;
8269
8270 if (i & 0x80000000)
8271 i = ~i;
8272
8273 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8274
8275 masked_const = i & 0xffff8000;
8276 if (masked_const != 0)
8277 {
8278 cost++;
8279 masked_const = i & 0xf8000000;
8280 if (masked_const != 0)
8281 cost++;
8282 }
8283 *total = COSTS_N_INSNS (cost);
8284 return false;
8285 }
8286
8287 if (mode == SImode)
8288 {
8289 *total = COSTS_N_INSNS (3);
8290 return false;
8291 }
8292
8293 /* Requires a lib call */
8294 *total = COSTS_N_INSNS (20);
8295 return false;
8296
8297 default:
8298 return arm_rtx_costs_1 (x, outer_code, total, speed);
8299 }
8300 }
8301
8302
8303 /* RTX costs for 9e (and later) cores. */
8304
8305 static bool
8306 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8307 int *total, bool speed)
8308 {
8309 enum machine_mode mode = GET_MODE (x);
8310
8311 if (TARGET_THUMB1)
8312 {
8313 switch (code)
8314 {
8315 case MULT:
8316 *total = COSTS_N_INSNS (3);
8317 return true;
8318
8319 default:
8320 *total = thumb1_rtx_costs (x, code, outer_code);
8321 return true;
8322 }
8323 }
8324
8325 switch (code)
8326 {
8327 case MULT:
8328 /* There is no point basing this on the tuning, since it is always the
8329 fast variant if it exists at all. */
8330 if (mode == DImode
8331 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8332 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8333 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8334 {
8335 *total = COSTS_N_INSNS (2);
8336 return false;
8337 }
8338
8339
8340 if (mode == DImode)
8341 {
8342 *total = COSTS_N_INSNS (5);
8343 return false;
8344 }
8345
8346 if (mode == SImode)
8347 {
8348 *total = COSTS_N_INSNS (2);
8349 return false;
8350 }
8351
8352 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8353 {
8354 if (TARGET_HARD_FLOAT
8355 && (mode == SFmode
8356 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8357 {
8358 *total = COSTS_N_INSNS (1);
8359 return false;
8360 }
8361 }
8362
8363 *total = COSTS_N_INSNS (20);
8364 return false;
8365
8366 default:
8367 return arm_rtx_costs_1 (x, outer_code, total, speed);
8368 }
8369 }
8370 /* All address computations that can be done are free, but rtx cost returns
8371 the same for practically all of them. So we weight the different types
8372 of address here in the order (most pref first):
8373 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8374 static inline int
8375 arm_arm_address_cost (rtx x)
8376 {
8377 enum rtx_code c = GET_CODE (x);
8378
8379 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8380 return 0;
8381 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8382 return 10;
8383
8384 if (c == PLUS)
8385 {
8386 if (CONST_INT_P (XEXP (x, 1)))
8387 return 2;
8388
8389 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8390 return 3;
8391
8392 return 4;
8393 }
8394
8395 return 6;
8396 }
8397
8398 static inline int
8399 arm_thumb_address_cost (rtx x)
8400 {
8401 enum rtx_code c = GET_CODE (x);
8402
8403 if (c == REG)
8404 return 1;
8405 if (c == PLUS
8406 && REG_P (XEXP (x, 0))
8407 && CONST_INT_P (XEXP (x, 1)))
8408 return 1;
8409
8410 return 2;
8411 }
8412
8413 static int
8414 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8415 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8416 {
8417 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8418 }
8419
8420 /* Adjust cost hook for XScale. */
8421 static bool
8422 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8423 {
8424 /* Some true dependencies can have a higher cost depending
8425 on precisely how certain input operands are used. */
8426 if (REG_NOTE_KIND(link) == 0
8427 && recog_memoized (insn) >= 0
8428 && recog_memoized (dep) >= 0)
8429 {
8430 int shift_opnum = get_attr_shift (insn);
8431 enum attr_type attr_type = get_attr_type (dep);
8432
8433 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8434 operand for INSN. If we have a shifted input operand and the
8435 instruction we depend on is another ALU instruction, then we may
8436 have to account for an additional stall. */
8437 if (shift_opnum != 0
8438 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8439 {
8440 rtx shifted_operand;
8441 int opno;
8442
8443 /* Get the shifted operand. */
8444 extract_insn (insn);
8445 shifted_operand = recog_data.operand[shift_opnum];
8446
8447 /* Iterate over all the operands in DEP. If we write an operand
8448 that overlaps with SHIFTED_OPERAND, then we have increase the
8449 cost of this dependency. */
8450 extract_insn (dep);
8451 preprocess_constraints ();
8452 for (opno = 0; opno < recog_data.n_operands; opno++)
8453 {
8454 /* We can ignore strict inputs. */
8455 if (recog_data.operand_type[opno] == OP_IN)
8456 continue;
8457
8458 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8459 shifted_operand))
8460 {
8461 *cost = 2;
8462 return false;
8463 }
8464 }
8465 }
8466 }
8467 return true;
8468 }
8469
8470 /* Adjust cost hook for Cortex A9. */
8471 static bool
8472 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8473 {
8474 switch (REG_NOTE_KIND (link))
8475 {
8476 case REG_DEP_ANTI:
8477 *cost = 0;
8478 return false;
8479
8480 case REG_DEP_TRUE:
8481 case REG_DEP_OUTPUT:
8482 if (recog_memoized (insn) >= 0
8483 && recog_memoized (dep) >= 0)
8484 {
8485 if (GET_CODE (PATTERN (insn)) == SET)
8486 {
8487 if (GET_MODE_CLASS
8488 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8489 || GET_MODE_CLASS
8490 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8491 {
8492 enum attr_type attr_type_insn = get_attr_type (insn);
8493 enum attr_type attr_type_dep = get_attr_type (dep);
8494
8495 /* By default all dependencies of the form
8496 s0 = s0 <op> s1
8497 s0 = s0 <op> s2
8498 have an extra latency of 1 cycle because
8499 of the input and output dependency in this
8500 case. However this gets modeled as an true
8501 dependency and hence all these checks. */
8502 if (REG_P (SET_DEST (PATTERN (insn)))
8503 && REG_P (SET_DEST (PATTERN (dep)))
8504 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8505 SET_DEST (PATTERN (dep))))
8506 {
8507 /* FMACS is a special case where the dependent
8508 instruction can be issued 3 cycles before
8509 the normal latency in case of an output
8510 dependency. */
8511 if ((attr_type_insn == TYPE_FMACS
8512 || attr_type_insn == TYPE_FMACD)
8513 && (attr_type_dep == TYPE_FMACS
8514 || attr_type_dep == TYPE_FMACD))
8515 {
8516 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8517 *cost = insn_default_latency (dep) - 3;
8518 else
8519 *cost = insn_default_latency (dep);
8520 return false;
8521 }
8522 else
8523 {
8524 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8525 *cost = insn_default_latency (dep) + 1;
8526 else
8527 *cost = insn_default_latency (dep);
8528 }
8529 return false;
8530 }
8531 }
8532 }
8533 }
8534 break;
8535
8536 default:
8537 gcc_unreachable ();
8538 }
8539
8540 return true;
8541 }
8542
8543 /* Adjust cost hook for FA726TE. */
8544 static bool
8545 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8546 {
8547 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8548 have penalty of 3. */
8549 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8550 && recog_memoized (insn) >= 0
8551 && recog_memoized (dep) >= 0
8552 && get_attr_conds (dep) == CONDS_SET)
8553 {
8554 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8555 if (get_attr_conds (insn) == CONDS_USE
8556 && get_attr_type (insn) != TYPE_BRANCH)
8557 {
8558 *cost = 3;
8559 return false;
8560 }
8561
8562 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8563 || get_attr_conds (insn) == CONDS_USE)
8564 {
8565 *cost = 0;
8566 return false;
8567 }
8568 }
8569
8570 return true;
8571 }
8572
8573 /* Implement TARGET_REGISTER_MOVE_COST.
8574
8575 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8576 it is typically more expensive than a single memory access. We set
8577 the cost to less than two memory accesses so that floating
8578 point to integer conversion does not go through memory. */
8579
8580 int
8581 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8582 reg_class_t from, reg_class_t to)
8583 {
8584 if (TARGET_32BIT)
8585 {
8586 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8587 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8588 return 15;
8589 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8590 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8591 return 4;
8592 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8593 return 20;
8594 else
8595 return 2;
8596 }
8597 else
8598 {
8599 if (from == HI_REGS || to == HI_REGS)
8600 return 4;
8601 else
8602 return 2;
8603 }
8604 }
8605
8606 /* Implement TARGET_MEMORY_MOVE_COST. */
8607
8608 int
8609 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8610 bool in ATTRIBUTE_UNUSED)
8611 {
8612 if (TARGET_32BIT)
8613 return 10;
8614 else
8615 {
8616 if (GET_MODE_SIZE (mode) < 4)
8617 return 8;
8618 else
8619 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8620 }
8621 }
8622
8623 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8624 It corrects the value of COST based on the relationship between
8625 INSN and DEP through the dependence LINK. It returns the new
8626 value. There is a per-core adjust_cost hook to adjust scheduler costs
8627 and the per-core hook can choose to completely override the generic
8628 adjust_cost function. Only put bits of code into arm_adjust_cost that
8629 are common across all cores. */
8630 static int
8631 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8632 {
8633 rtx i_pat, d_pat;
8634
8635 /* When generating Thumb-1 code, we want to place flag-setting operations
8636 close to a conditional branch which depends on them, so that we can
8637 omit the comparison. */
8638 if (TARGET_THUMB1
8639 && REG_NOTE_KIND (link) == 0
8640 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8641 && recog_memoized (dep) >= 0
8642 && get_attr_conds (dep) == CONDS_SET)
8643 return 0;
8644
8645 if (current_tune->sched_adjust_cost != NULL)
8646 {
8647 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8648 return cost;
8649 }
8650
8651 /* XXX Is this strictly true? */
8652 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8653 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8654 return 0;
8655
8656 /* Call insns don't incur a stall, even if they follow a load. */
8657 if (REG_NOTE_KIND (link) == 0
8658 && CALL_P (insn))
8659 return 1;
8660
8661 if ((i_pat = single_set (insn)) != NULL
8662 && MEM_P (SET_SRC (i_pat))
8663 && (d_pat = single_set (dep)) != NULL
8664 && MEM_P (SET_DEST (d_pat)))
8665 {
8666 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8667 /* This is a load after a store, there is no conflict if the load reads
8668 from a cached area. Assume that loads from the stack, and from the
8669 constant pool are cached, and that others will miss. This is a
8670 hack. */
8671
8672 if ((GET_CODE (src_mem) == SYMBOL_REF
8673 && CONSTANT_POOL_ADDRESS_P (src_mem))
8674 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8675 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8676 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8677 return 1;
8678 }
8679
8680 return cost;
8681 }
8682
8683 static int
8684 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8685 {
8686 if (TARGET_32BIT)
8687 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8688 else
8689 return (optimize > 0) ? 2 : 0;
8690 }
8691
8692 static int
8693 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8694 {
8695 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8696 }
8697
8698 static bool fp_consts_inited = false;
8699
8700 static REAL_VALUE_TYPE value_fp0;
8701
8702 static void
8703 init_fp_table (void)
8704 {
8705 REAL_VALUE_TYPE r;
8706
8707 r = REAL_VALUE_ATOF ("0", DFmode);
8708 value_fp0 = r;
8709 fp_consts_inited = true;
8710 }
8711
8712 /* Return TRUE if rtx X is a valid immediate FP constant. */
8713 int
8714 arm_const_double_rtx (rtx x)
8715 {
8716 REAL_VALUE_TYPE r;
8717
8718 if (!fp_consts_inited)
8719 init_fp_table ();
8720
8721 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8722 if (REAL_VALUE_MINUS_ZERO (r))
8723 return 0;
8724
8725 if (REAL_VALUES_EQUAL (r, value_fp0))
8726 return 1;
8727
8728 return 0;
8729 }
8730
8731 /* VFPv3 has a fairly wide range of representable immediates, formed from
8732 "quarter-precision" floating-point values. These can be evaluated using this
8733 formula (with ^ for exponentiation):
8734
8735 -1^s * n * 2^-r
8736
8737 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8738 16 <= n <= 31 and 0 <= r <= 7.
8739
8740 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8741
8742 - A (most-significant) is the sign bit.
8743 - BCD are the exponent (encoded as r XOR 3).
8744 - EFGH are the mantissa (encoded as n - 16).
8745 */
8746
8747 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8748 fconst[sd] instruction, or -1 if X isn't suitable. */
8749 static int
8750 vfp3_const_double_index (rtx x)
8751 {
8752 REAL_VALUE_TYPE r, m;
8753 int sign, exponent;
8754 unsigned HOST_WIDE_INT mantissa, mant_hi;
8755 unsigned HOST_WIDE_INT mask;
8756 HOST_WIDE_INT m1, m2;
8757 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8758
8759 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
8760 return -1;
8761
8762 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8763
8764 /* We can't represent these things, so detect them first. */
8765 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8766 return -1;
8767
8768 /* Extract sign, exponent and mantissa. */
8769 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8770 r = real_value_abs (&r);
8771 exponent = REAL_EXP (&r);
8772 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8773 highest (sign) bit, with a fixed binary point at bit point_pos.
8774 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8775 bits for the mantissa, this may fail (low bits would be lost). */
8776 real_ldexp (&m, &r, point_pos - exponent);
8777 REAL_VALUE_TO_INT (&m1, &m2, m);
8778 mantissa = m1;
8779 mant_hi = m2;
8780
8781 /* If there are bits set in the low part of the mantissa, we can't
8782 represent this value. */
8783 if (mantissa != 0)
8784 return -1;
8785
8786 /* Now make it so that mantissa contains the most-significant bits, and move
8787 the point_pos to indicate that the least-significant bits have been
8788 discarded. */
8789 point_pos -= HOST_BITS_PER_WIDE_INT;
8790 mantissa = mant_hi;
8791
8792 /* We can permit four significant bits of mantissa only, plus a high bit
8793 which is always 1. */
8794 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8795 if ((mantissa & mask) != 0)
8796 return -1;
8797
8798 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8799 mantissa >>= point_pos - 5;
8800
8801 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8802 floating-point immediate zero with Neon using an integer-zero load, but
8803 that case is handled elsewhere.) */
8804 if (mantissa == 0)
8805 return -1;
8806
8807 gcc_assert (mantissa >= 16 && mantissa <= 31);
8808
8809 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8810 normalized significands are in the range [1, 2). (Our mantissa is shifted
8811 left 4 places at this point relative to normalized IEEE754 values). GCC
8812 internally uses [0.5, 1) (see real.c), so the exponent returned from
8813 REAL_EXP must be altered. */
8814 exponent = 5 - exponent;
8815
8816 if (exponent < 0 || exponent > 7)
8817 return -1;
8818
8819 /* Sign, mantissa and exponent are now in the correct form to plug into the
8820 formula described in the comment above. */
8821 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8822 }
8823
8824 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8825 int
8826 vfp3_const_double_rtx (rtx x)
8827 {
8828 if (!TARGET_VFP3)
8829 return 0;
8830
8831 return vfp3_const_double_index (x) != -1;
8832 }
8833
8834 /* Recognize immediates which can be used in various Neon instructions. Legal
8835 immediates are described by the following table (for VMVN variants, the
8836 bitwise inverse of the constant shown is recognized. In either case, VMOV
8837 is output and the correct instruction to use for a given constant is chosen
8838 by the assembler). The constant shown is replicated across all elements of
8839 the destination vector.
8840
8841 insn elems variant constant (binary)
8842 ---- ----- ------- -----------------
8843 vmov i32 0 00000000 00000000 00000000 abcdefgh
8844 vmov i32 1 00000000 00000000 abcdefgh 00000000
8845 vmov i32 2 00000000 abcdefgh 00000000 00000000
8846 vmov i32 3 abcdefgh 00000000 00000000 00000000
8847 vmov i16 4 00000000 abcdefgh
8848 vmov i16 5 abcdefgh 00000000
8849 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8850 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8851 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8852 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8853 vmvn i16 10 00000000 abcdefgh
8854 vmvn i16 11 abcdefgh 00000000
8855 vmov i32 12 00000000 00000000 abcdefgh 11111111
8856 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8857 vmov i32 14 00000000 abcdefgh 11111111 11111111
8858 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8859 vmov i8 16 abcdefgh
8860 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8861 eeeeeeee ffffffff gggggggg hhhhhhhh
8862 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8863 vmov f32 19 00000000 00000000 00000000 00000000
8864
8865 For case 18, B = !b. Representable values are exactly those accepted by
8866 vfp3_const_double_index, but are output as floating-point numbers rather
8867 than indices.
8868
8869 For case 19, we will change it to vmov.i32 when assembling.
8870
8871 Variants 0-5 (inclusive) may also be used as immediates for the second
8872 operand of VORR/VBIC instructions.
8873
8874 The INVERSE argument causes the bitwise inverse of the given operand to be
8875 recognized instead (used for recognizing legal immediates for the VAND/VORN
8876 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8877 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8878 output, rather than the real insns vbic/vorr).
8879
8880 INVERSE makes no difference to the recognition of float vectors.
8881
8882 The return value is the variant of immediate as shown in the above table, or
8883 -1 if the given value doesn't match any of the listed patterns.
8884 */
8885 static int
8886 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8887 rtx *modconst, int *elementwidth)
8888 {
8889 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8890 matches = 1; \
8891 for (i = 0; i < idx; i += (STRIDE)) \
8892 if (!(TEST)) \
8893 matches = 0; \
8894 if (matches) \
8895 { \
8896 immtype = (CLASS); \
8897 elsize = (ELSIZE); \
8898 break; \
8899 }
8900
8901 unsigned int i, elsize = 0, idx = 0, n_elts;
8902 unsigned int innersize;
8903 unsigned char bytes[16];
8904 int immtype = -1, matches;
8905 unsigned int invmask = inverse ? 0xff : 0;
8906 bool vector = GET_CODE (op) == CONST_VECTOR;
8907
8908 if (vector)
8909 {
8910 n_elts = CONST_VECTOR_NUNITS (op);
8911 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8912 }
8913 else
8914 {
8915 n_elts = 1;
8916 if (mode == VOIDmode)
8917 mode = DImode;
8918 innersize = GET_MODE_SIZE (mode);
8919 }
8920
8921 /* Vectors of float constants. */
8922 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8923 {
8924 rtx el0 = CONST_VECTOR_ELT (op, 0);
8925 REAL_VALUE_TYPE r0;
8926
8927 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
8928 return -1;
8929
8930 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8931
8932 for (i = 1; i < n_elts; i++)
8933 {
8934 rtx elt = CONST_VECTOR_ELT (op, i);
8935 REAL_VALUE_TYPE re;
8936
8937 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8938
8939 if (!REAL_VALUES_EQUAL (r0, re))
8940 return -1;
8941 }
8942
8943 if (modconst)
8944 *modconst = CONST_VECTOR_ELT (op, 0);
8945
8946 if (elementwidth)
8947 *elementwidth = 0;
8948
8949 if (el0 == CONST0_RTX (GET_MODE (el0)))
8950 return 19;
8951 else
8952 return 18;
8953 }
8954
8955 /* Splat vector constant out into a byte vector. */
8956 for (i = 0; i < n_elts; i++)
8957 {
8958 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8959 unsigned HOST_WIDE_INT elpart;
8960 unsigned int part, parts;
8961
8962 if (CONST_INT_P (el))
8963 {
8964 elpart = INTVAL (el);
8965 parts = 1;
8966 }
8967 else if (CONST_DOUBLE_P (el))
8968 {
8969 elpart = CONST_DOUBLE_LOW (el);
8970 parts = 2;
8971 }
8972 else
8973 gcc_unreachable ();
8974
8975 for (part = 0; part < parts; part++)
8976 {
8977 unsigned int byte;
8978 for (byte = 0; byte < innersize; byte++)
8979 {
8980 bytes[idx++] = (elpart & 0xff) ^ invmask;
8981 elpart >>= BITS_PER_UNIT;
8982 }
8983 if (CONST_DOUBLE_P (el))
8984 elpart = CONST_DOUBLE_HIGH (el);
8985 }
8986 }
8987
8988 /* Sanity check. */
8989 gcc_assert (idx == GET_MODE_SIZE (mode));
8990
8991 do
8992 {
8993 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8994 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8995
8996 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8997 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8998
8999 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9000 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9001
9002 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9003 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9004
9005 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9006
9007 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9008
9009 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9010 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9011
9012 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9013 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9014
9015 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9016 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9017
9018 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9019 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9020
9021 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9022
9023 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9024
9025 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9026 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9027
9028 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9029 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9030
9031 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9032 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9033
9034 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9035 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9036
9037 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9038
9039 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9040 && bytes[i] == bytes[(i + 8) % idx]);
9041 }
9042 while (0);
9043
9044 if (immtype == -1)
9045 return -1;
9046
9047 if (elementwidth)
9048 *elementwidth = elsize;
9049
9050 if (modconst)
9051 {
9052 unsigned HOST_WIDE_INT imm = 0;
9053
9054 /* Un-invert bytes of recognized vector, if necessary. */
9055 if (invmask != 0)
9056 for (i = 0; i < idx; i++)
9057 bytes[i] ^= invmask;
9058
9059 if (immtype == 17)
9060 {
9061 /* FIXME: Broken on 32-bit H_W_I hosts. */
9062 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9063
9064 for (i = 0; i < 8; i++)
9065 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9066 << (i * BITS_PER_UNIT);
9067
9068 *modconst = GEN_INT (imm);
9069 }
9070 else
9071 {
9072 unsigned HOST_WIDE_INT imm = 0;
9073
9074 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9075 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9076
9077 *modconst = GEN_INT (imm);
9078 }
9079 }
9080
9081 return immtype;
9082 #undef CHECK
9083 }
9084
9085 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9086 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9087 float elements), and a modified constant (whatever should be output for a
9088 VMOV) in *MODCONST. */
9089
9090 int
9091 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9092 rtx *modconst, int *elementwidth)
9093 {
9094 rtx tmpconst;
9095 int tmpwidth;
9096 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9097
9098 if (retval == -1)
9099 return 0;
9100
9101 if (modconst)
9102 *modconst = tmpconst;
9103
9104 if (elementwidth)
9105 *elementwidth = tmpwidth;
9106
9107 return 1;
9108 }
9109
9110 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9111 the immediate is valid, write a constant suitable for using as an operand
9112 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9113 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9114
9115 int
9116 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9117 rtx *modconst, int *elementwidth)
9118 {
9119 rtx tmpconst;
9120 int tmpwidth;
9121 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9122
9123 if (retval < 0 || retval > 5)
9124 return 0;
9125
9126 if (modconst)
9127 *modconst = tmpconst;
9128
9129 if (elementwidth)
9130 *elementwidth = tmpwidth;
9131
9132 return 1;
9133 }
9134
9135 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9136 the immediate is valid, write a constant suitable for using as an operand
9137 to VSHR/VSHL to *MODCONST and the corresponding element width to
9138 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9139 because they have different limitations. */
9140
9141 int
9142 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9143 rtx *modconst, int *elementwidth,
9144 bool isleftshift)
9145 {
9146 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9147 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9148 unsigned HOST_WIDE_INT last_elt = 0;
9149 unsigned HOST_WIDE_INT maxshift;
9150
9151 /* Split vector constant out into a byte vector. */
9152 for (i = 0; i < n_elts; i++)
9153 {
9154 rtx el = CONST_VECTOR_ELT (op, i);
9155 unsigned HOST_WIDE_INT elpart;
9156
9157 if (CONST_INT_P (el))
9158 elpart = INTVAL (el);
9159 else if (CONST_DOUBLE_P (el))
9160 return 0;
9161 else
9162 gcc_unreachable ();
9163
9164 if (i != 0 && elpart != last_elt)
9165 return 0;
9166
9167 last_elt = elpart;
9168 }
9169
9170 /* Shift less than element size. */
9171 maxshift = innersize * 8;
9172
9173 if (isleftshift)
9174 {
9175 /* Left shift immediate value can be from 0 to <size>-1. */
9176 if (last_elt >= maxshift)
9177 return 0;
9178 }
9179 else
9180 {
9181 /* Right shift immediate value can be from 1 to <size>. */
9182 if (last_elt == 0 || last_elt > maxshift)
9183 return 0;
9184 }
9185
9186 if (elementwidth)
9187 *elementwidth = innersize * 8;
9188
9189 if (modconst)
9190 *modconst = CONST_VECTOR_ELT (op, 0);
9191
9192 return 1;
9193 }
9194
9195 /* Return a string suitable for output of Neon immediate logic operation
9196 MNEM. */
9197
9198 char *
9199 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9200 int inverse, int quad)
9201 {
9202 int width, is_valid;
9203 static char templ[40];
9204
9205 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9206
9207 gcc_assert (is_valid != 0);
9208
9209 if (quad)
9210 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9211 else
9212 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9213
9214 return templ;
9215 }
9216
9217 /* Return a string suitable for output of Neon immediate shift operation
9218 (VSHR or VSHL) MNEM. */
9219
9220 char *
9221 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9222 enum machine_mode mode, int quad,
9223 bool isleftshift)
9224 {
9225 int width, is_valid;
9226 static char templ[40];
9227
9228 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9229 gcc_assert (is_valid != 0);
9230
9231 if (quad)
9232 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9233 else
9234 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9235
9236 return templ;
9237 }
9238
9239 /* Output a sequence of pairwise operations to implement a reduction.
9240 NOTE: We do "too much work" here, because pairwise operations work on two
9241 registers-worth of operands in one go. Unfortunately we can't exploit those
9242 extra calculations to do the full operation in fewer steps, I don't think.
9243 Although all vector elements of the result but the first are ignored, we
9244 actually calculate the same result in each of the elements. An alternative
9245 such as initially loading a vector with zero to use as each of the second
9246 operands would use up an additional register and take an extra instruction,
9247 for no particular gain. */
9248
9249 void
9250 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9251 rtx (*reduc) (rtx, rtx, rtx))
9252 {
9253 enum machine_mode inner = GET_MODE_INNER (mode);
9254 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9255 rtx tmpsum = op1;
9256
9257 for (i = parts / 2; i >= 1; i /= 2)
9258 {
9259 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9260 emit_insn (reduc (dest, tmpsum, tmpsum));
9261 tmpsum = dest;
9262 }
9263 }
9264
9265 /* If VALS is a vector constant that can be loaded into a register
9266 using VDUP, generate instructions to do so and return an RTX to
9267 assign to the register. Otherwise return NULL_RTX. */
9268
9269 static rtx
9270 neon_vdup_constant (rtx vals)
9271 {
9272 enum machine_mode mode = GET_MODE (vals);
9273 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9274 int n_elts = GET_MODE_NUNITS (mode);
9275 bool all_same = true;
9276 rtx x;
9277 int i;
9278
9279 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9280 return NULL_RTX;
9281
9282 for (i = 0; i < n_elts; ++i)
9283 {
9284 x = XVECEXP (vals, 0, i);
9285 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9286 all_same = false;
9287 }
9288
9289 if (!all_same)
9290 /* The elements are not all the same. We could handle repeating
9291 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9292 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9293 vdup.i16). */
9294 return NULL_RTX;
9295
9296 /* We can load this constant by using VDUP and a constant in a
9297 single ARM register. This will be cheaper than a vector
9298 load. */
9299
9300 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9301 return gen_rtx_VEC_DUPLICATE (mode, x);
9302 }
9303
9304 /* Generate code to load VALS, which is a PARALLEL containing only
9305 constants (for vec_init) or CONST_VECTOR, efficiently into a
9306 register. Returns an RTX to copy into the register, or NULL_RTX
9307 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9308
9309 rtx
9310 neon_make_constant (rtx vals)
9311 {
9312 enum machine_mode mode = GET_MODE (vals);
9313 rtx target;
9314 rtx const_vec = NULL_RTX;
9315 int n_elts = GET_MODE_NUNITS (mode);
9316 int n_const = 0;
9317 int i;
9318
9319 if (GET_CODE (vals) == CONST_VECTOR)
9320 const_vec = vals;
9321 else if (GET_CODE (vals) == PARALLEL)
9322 {
9323 /* A CONST_VECTOR must contain only CONST_INTs and
9324 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9325 Only store valid constants in a CONST_VECTOR. */
9326 for (i = 0; i < n_elts; ++i)
9327 {
9328 rtx x = XVECEXP (vals, 0, i);
9329 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9330 n_const++;
9331 }
9332 if (n_const == n_elts)
9333 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9334 }
9335 else
9336 gcc_unreachable ();
9337
9338 if (const_vec != NULL
9339 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9340 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9341 return const_vec;
9342 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9343 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9344 pipeline cycle; creating the constant takes one or two ARM
9345 pipeline cycles. */
9346 return target;
9347 else if (const_vec != NULL_RTX)
9348 /* Load from constant pool. On Cortex-A8 this takes two cycles
9349 (for either double or quad vectors). We can not take advantage
9350 of single-cycle VLD1 because we need a PC-relative addressing
9351 mode. */
9352 return const_vec;
9353 else
9354 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9355 We can not construct an initializer. */
9356 return NULL_RTX;
9357 }
9358
9359 /* Initialize vector TARGET to VALS. */
9360
9361 void
9362 neon_expand_vector_init (rtx target, rtx vals)
9363 {
9364 enum machine_mode mode = GET_MODE (target);
9365 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9366 int n_elts = GET_MODE_NUNITS (mode);
9367 int n_var = 0, one_var = -1;
9368 bool all_same = true;
9369 rtx x, mem;
9370 int i;
9371
9372 for (i = 0; i < n_elts; ++i)
9373 {
9374 x = XVECEXP (vals, 0, i);
9375 if (!CONSTANT_P (x))
9376 ++n_var, one_var = i;
9377
9378 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9379 all_same = false;
9380 }
9381
9382 if (n_var == 0)
9383 {
9384 rtx constant = neon_make_constant (vals);
9385 if (constant != NULL_RTX)
9386 {
9387 emit_move_insn (target, constant);
9388 return;
9389 }
9390 }
9391
9392 /* Splat a single non-constant element if we can. */
9393 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9394 {
9395 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9396 emit_insn (gen_rtx_SET (VOIDmode, target,
9397 gen_rtx_VEC_DUPLICATE (mode, x)));
9398 return;
9399 }
9400
9401 /* One field is non-constant. Load constant then overwrite varying
9402 field. This is more efficient than using the stack. */
9403 if (n_var == 1)
9404 {
9405 rtx copy = copy_rtx (vals);
9406 rtx index = GEN_INT (one_var);
9407
9408 /* Load constant part of vector, substitute neighboring value for
9409 varying element. */
9410 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9411 neon_expand_vector_init (target, copy);
9412
9413 /* Insert variable. */
9414 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9415 switch (mode)
9416 {
9417 case V8QImode:
9418 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9419 break;
9420 case V16QImode:
9421 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9422 break;
9423 case V4HImode:
9424 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9425 break;
9426 case V8HImode:
9427 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9428 break;
9429 case V2SImode:
9430 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9431 break;
9432 case V4SImode:
9433 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9434 break;
9435 case V2SFmode:
9436 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9437 break;
9438 case V4SFmode:
9439 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9440 break;
9441 case V2DImode:
9442 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9443 break;
9444 default:
9445 gcc_unreachable ();
9446 }
9447 return;
9448 }
9449
9450 /* Construct the vector in memory one field at a time
9451 and load the whole vector. */
9452 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9453 for (i = 0; i < n_elts; i++)
9454 emit_move_insn (adjust_address_nv (mem, inner_mode,
9455 i * GET_MODE_SIZE (inner_mode)),
9456 XVECEXP (vals, 0, i));
9457 emit_move_insn (target, mem);
9458 }
9459
9460 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9461 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9462 reported source locations are bogus. */
9463
9464 static void
9465 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9466 const char *err)
9467 {
9468 HOST_WIDE_INT lane;
9469
9470 gcc_assert (CONST_INT_P (operand));
9471
9472 lane = INTVAL (operand);
9473
9474 if (lane < low || lane >= high)
9475 error (err);
9476 }
9477
9478 /* Bounds-check lanes. */
9479
9480 void
9481 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9482 {
9483 bounds_check (operand, low, high, "lane out of range");
9484 }
9485
9486 /* Bounds-check constants. */
9487
9488 void
9489 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9490 {
9491 bounds_check (operand, low, high, "constant out of range");
9492 }
9493
9494 HOST_WIDE_INT
9495 neon_element_bits (enum machine_mode mode)
9496 {
9497 if (mode == DImode)
9498 return GET_MODE_BITSIZE (mode);
9499 else
9500 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9501 }
9502
9503 \f
9504 /* Predicates for `match_operand' and `match_operator'. */
9505
9506 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9507 WB is true if full writeback address modes are allowed and is false
9508 if limited writeback address modes (POST_INC and PRE_DEC) are
9509 allowed. */
9510
9511 int
9512 arm_coproc_mem_operand (rtx op, bool wb)
9513 {
9514 rtx ind;
9515
9516 /* Reject eliminable registers. */
9517 if (! (reload_in_progress || reload_completed)
9518 && ( reg_mentioned_p (frame_pointer_rtx, op)
9519 || reg_mentioned_p (arg_pointer_rtx, op)
9520 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9521 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9522 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9523 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9524 return FALSE;
9525
9526 /* Constants are converted into offsets from labels. */
9527 if (!MEM_P (op))
9528 return FALSE;
9529
9530 ind = XEXP (op, 0);
9531
9532 if (reload_completed
9533 && (GET_CODE (ind) == LABEL_REF
9534 || (GET_CODE (ind) == CONST
9535 && GET_CODE (XEXP (ind, 0)) == PLUS
9536 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9537 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9538 return TRUE;
9539
9540 /* Match: (mem (reg)). */
9541 if (REG_P (ind))
9542 return arm_address_register_rtx_p (ind, 0);
9543
9544 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9545 acceptable in any case (subject to verification by
9546 arm_address_register_rtx_p). We need WB to be true to accept
9547 PRE_INC and POST_DEC. */
9548 if (GET_CODE (ind) == POST_INC
9549 || GET_CODE (ind) == PRE_DEC
9550 || (wb
9551 && (GET_CODE (ind) == PRE_INC
9552 || GET_CODE (ind) == POST_DEC)))
9553 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9554
9555 if (wb
9556 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9557 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9558 && GET_CODE (XEXP (ind, 1)) == PLUS
9559 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9560 ind = XEXP (ind, 1);
9561
9562 /* Match:
9563 (plus (reg)
9564 (const)). */
9565 if (GET_CODE (ind) == PLUS
9566 && REG_P (XEXP (ind, 0))
9567 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9568 && CONST_INT_P (XEXP (ind, 1))
9569 && INTVAL (XEXP (ind, 1)) > -1024
9570 && INTVAL (XEXP (ind, 1)) < 1024
9571 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9572 return TRUE;
9573
9574 return FALSE;
9575 }
9576
9577 /* Return TRUE if OP is a memory operand which we can load or store a vector
9578 to/from. TYPE is one of the following values:
9579 0 - Vector load/stor (vldr)
9580 1 - Core registers (ldm)
9581 2 - Element/structure loads (vld1)
9582 */
9583 int
9584 neon_vector_mem_operand (rtx op, int type)
9585 {
9586 rtx ind;
9587
9588 /* Reject eliminable registers. */
9589 if (! (reload_in_progress || reload_completed)
9590 && ( reg_mentioned_p (frame_pointer_rtx, op)
9591 || reg_mentioned_p (arg_pointer_rtx, op)
9592 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9593 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9594 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9595 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9596 return FALSE;
9597
9598 /* Constants are converted into offsets from labels. */
9599 if (!MEM_P (op))
9600 return FALSE;
9601
9602 ind = XEXP (op, 0);
9603
9604 if (reload_completed
9605 && (GET_CODE (ind) == LABEL_REF
9606 || (GET_CODE (ind) == CONST
9607 && GET_CODE (XEXP (ind, 0)) == PLUS
9608 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9609 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9610 return TRUE;
9611
9612 /* Match: (mem (reg)). */
9613 if (REG_P (ind))
9614 return arm_address_register_rtx_p (ind, 0);
9615
9616 /* Allow post-increment with Neon registers. */
9617 if ((type != 1 && GET_CODE (ind) == POST_INC)
9618 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9619 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9620
9621 /* FIXME: vld1 allows register post-modify. */
9622
9623 /* Match:
9624 (plus (reg)
9625 (const)). */
9626 if (type == 0
9627 && GET_CODE (ind) == PLUS
9628 && REG_P (XEXP (ind, 0))
9629 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9630 && CONST_INT_P (XEXP (ind, 1))
9631 && INTVAL (XEXP (ind, 1)) > -1024
9632 /* For quad modes, we restrict the constant offset to be slightly less
9633 than what the instruction format permits. We have no such constraint
9634 on double mode offsets. (This must match arm_legitimate_index_p.) */
9635 && (INTVAL (XEXP (ind, 1))
9636 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
9637 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9638 return TRUE;
9639
9640 return FALSE;
9641 }
9642
9643 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9644 type. */
9645 int
9646 neon_struct_mem_operand (rtx op)
9647 {
9648 rtx ind;
9649
9650 /* Reject eliminable registers. */
9651 if (! (reload_in_progress || reload_completed)
9652 && ( reg_mentioned_p (frame_pointer_rtx, op)
9653 || reg_mentioned_p (arg_pointer_rtx, op)
9654 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9655 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9656 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9657 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9658 return FALSE;
9659
9660 /* Constants are converted into offsets from labels. */
9661 if (!MEM_P (op))
9662 return FALSE;
9663
9664 ind = XEXP (op, 0);
9665
9666 if (reload_completed
9667 && (GET_CODE (ind) == LABEL_REF
9668 || (GET_CODE (ind) == CONST
9669 && GET_CODE (XEXP (ind, 0)) == PLUS
9670 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9671 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9672 return TRUE;
9673
9674 /* Match: (mem (reg)). */
9675 if (REG_P (ind))
9676 return arm_address_register_rtx_p (ind, 0);
9677
9678 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9679 if (GET_CODE (ind) == POST_INC
9680 || GET_CODE (ind) == PRE_DEC)
9681 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9682
9683 return FALSE;
9684 }
9685
9686 /* Return true if X is a register that will be eliminated later on. */
9687 int
9688 arm_eliminable_register (rtx x)
9689 {
9690 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9691 || REGNO (x) == ARG_POINTER_REGNUM
9692 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9693 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9694 }
9695
9696 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9697 coprocessor registers. Otherwise return NO_REGS. */
9698
9699 enum reg_class
9700 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9701 {
9702 if (mode == HFmode)
9703 {
9704 if (!TARGET_NEON_FP16)
9705 return GENERAL_REGS;
9706 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9707 return NO_REGS;
9708 return GENERAL_REGS;
9709 }
9710
9711 /* The neon move patterns handle all legitimate vector and struct
9712 addresses. */
9713 if (TARGET_NEON
9714 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9715 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9716 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9717 || VALID_NEON_STRUCT_MODE (mode)))
9718 return NO_REGS;
9719
9720 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9721 return NO_REGS;
9722
9723 return GENERAL_REGS;
9724 }
9725
9726 /* Values which must be returned in the most-significant end of the return
9727 register. */
9728
9729 static bool
9730 arm_return_in_msb (const_tree valtype)
9731 {
9732 return (TARGET_AAPCS_BASED
9733 && BYTES_BIG_ENDIAN
9734 && (AGGREGATE_TYPE_P (valtype)
9735 || TREE_CODE (valtype) == COMPLEX_TYPE
9736 || FIXED_POINT_TYPE_P (valtype)));
9737 }
9738
9739 /* Return TRUE if X references a SYMBOL_REF. */
9740 int
9741 symbol_mentioned_p (rtx x)
9742 {
9743 const char * fmt;
9744 int i;
9745
9746 if (GET_CODE (x) == SYMBOL_REF)
9747 return 1;
9748
9749 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9750 are constant offsets, not symbols. */
9751 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9752 return 0;
9753
9754 fmt = GET_RTX_FORMAT (GET_CODE (x));
9755
9756 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9757 {
9758 if (fmt[i] == 'E')
9759 {
9760 int j;
9761
9762 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9763 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9764 return 1;
9765 }
9766 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9767 return 1;
9768 }
9769
9770 return 0;
9771 }
9772
9773 /* Return TRUE if X references a LABEL_REF. */
9774 int
9775 label_mentioned_p (rtx x)
9776 {
9777 const char * fmt;
9778 int i;
9779
9780 if (GET_CODE (x) == LABEL_REF)
9781 return 1;
9782
9783 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9784 instruction, but they are constant offsets, not symbols. */
9785 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9786 return 0;
9787
9788 fmt = GET_RTX_FORMAT (GET_CODE (x));
9789 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9790 {
9791 if (fmt[i] == 'E')
9792 {
9793 int j;
9794
9795 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9796 if (label_mentioned_p (XVECEXP (x, i, j)))
9797 return 1;
9798 }
9799 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9800 return 1;
9801 }
9802
9803 return 0;
9804 }
9805
9806 int
9807 tls_mentioned_p (rtx x)
9808 {
9809 switch (GET_CODE (x))
9810 {
9811 case CONST:
9812 return tls_mentioned_p (XEXP (x, 0));
9813
9814 case UNSPEC:
9815 if (XINT (x, 1) == UNSPEC_TLS)
9816 return 1;
9817
9818 default:
9819 return 0;
9820 }
9821 }
9822
9823 /* Must not copy any rtx that uses a pc-relative address. */
9824
9825 static int
9826 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9827 {
9828 if (GET_CODE (*x) == UNSPEC
9829 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9830 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9831 return 1;
9832 return 0;
9833 }
9834
9835 static bool
9836 arm_cannot_copy_insn_p (rtx insn)
9837 {
9838 /* The tls call insn cannot be copied, as it is paired with a data
9839 word. */
9840 if (recog_memoized (insn) == CODE_FOR_tlscall)
9841 return true;
9842
9843 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9844 }
9845
9846 enum rtx_code
9847 minmax_code (rtx x)
9848 {
9849 enum rtx_code code = GET_CODE (x);
9850
9851 switch (code)
9852 {
9853 case SMAX:
9854 return GE;
9855 case SMIN:
9856 return LE;
9857 case UMIN:
9858 return LEU;
9859 case UMAX:
9860 return GEU;
9861 default:
9862 gcc_unreachable ();
9863 }
9864 }
9865
9866 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9867
9868 bool
9869 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9870 int *mask, bool *signed_sat)
9871 {
9872 /* The high bound must be a power of two minus one. */
9873 int log = exact_log2 (INTVAL (hi_bound) + 1);
9874 if (log == -1)
9875 return false;
9876
9877 /* The low bound is either zero (for usat) or one less than the
9878 negation of the high bound (for ssat). */
9879 if (INTVAL (lo_bound) == 0)
9880 {
9881 if (mask)
9882 *mask = log;
9883 if (signed_sat)
9884 *signed_sat = false;
9885
9886 return true;
9887 }
9888
9889 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9890 {
9891 if (mask)
9892 *mask = log + 1;
9893 if (signed_sat)
9894 *signed_sat = true;
9895
9896 return true;
9897 }
9898
9899 return false;
9900 }
9901
9902 /* Return 1 if memory locations are adjacent. */
9903 int
9904 adjacent_mem_locations (rtx a, rtx b)
9905 {
9906 /* We don't guarantee to preserve the order of these memory refs. */
9907 if (volatile_refs_p (a) || volatile_refs_p (b))
9908 return 0;
9909
9910 if ((REG_P (XEXP (a, 0))
9911 || (GET_CODE (XEXP (a, 0)) == PLUS
9912 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
9913 && (REG_P (XEXP (b, 0))
9914 || (GET_CODE (XEXP (b, 0)) == PLUS
9915 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
9916 {
9917 HOST_WIDE_INT val0 = 0, val1 = 0;
9918 rtx reg0, reg1;
9919 int val_diff;
9920
9921 if (GET_CODE (XEXP (a, 0)) == PLUS)
9922 {
9923 reg0 = XEXP (XEXP (a, 0), 0);
9924 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9925 }
9926 else
9927 reg0 = XEXP (a, 0);
9928
9929 if (GET_CODE (XEXP (b, 0)) == PLUS)
9930 {
9931 reg1 = XEXP (XEXP (b, 0), 0);
9932 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9933 }
9934 else
9935 reg1 = XEXP (b, 0);
9936
9937 /* Don't accept any offset that will require multiple
9938 instructions to handle, since this would cause the
9939 arith_adjacentmem pattern to output an overlong sequence. */
9940 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9941 return 0;
9942
9943 /* Don't allow an eliminable register: register elimination can make
9944 the offset too large. */
9945 if (arm_eliminable_register (reg0))
9946 return 0;
9947
9948 val_diff = val1 - val0;
9949
9950 if (arm_ld_sched)
9951 {
9952 /* If the target has load delay slots, then there's no benefit
9953 to using an ldm instruction unless the offset is zero and
9954 we are optimizing for size. */
9955 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9956 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9957 && (val_diff == 4 || val_diff == -4));
9958 }
9959
9960 return ((REGNO (reg0) == REGNO (reg1))
9961 && (val_diff == 4 || val_diff == -4));
9962 }
9963
9964 return 0;
9965 }
9966
9967 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9968 for load operations, false for store operations. CONSECUTIVE is true
9969 if the register numbers in the operation must be consecutive in the register
9970 bank. RETURN_PC is true if value is to be loaded in PC.
9971 The pattern we are trying to match for load is:
9972 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9973 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9974 :
9975 :
9976 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9977 ]
9978 where
9979 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
9980 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
9981 3. If consecutive is TRUE, then for kth register being loaded,
9982 REGNO (R_dk) = REGNO (R_d0) + k.
9983 The pattern for store is similar. */
9984 bool
9985 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
9986 bool consecutive, bool return_pc)
9987 {
9988 HOST_WIDE_INT count = XVECLEN (op, 0);
9989 rtx reg, mem, addr;
9990 unsigned regno;
9991 unsigned first_regno;
9992 HOST_WIDE_INT i = 1, base = 0, offset = 0;
9993 rtx elt;
9994 bool addr_reg_in_reglist = false;
9995 bool update = false;
9996 int reg_increment;
9997 int offset_adj;
9998 int regs_per_val;
9999
10000 /* If not in SImode, then registers must be consecutive
10001 (e.g., VLDM instructions for DFmode). */
10002 gcc_assert ((mode == SImode) || consecutive);
10003 /* Setting return_pc for stores is illegal. */
10004 gcc_assert (!return_pc || load);
10005
10006 /* Set up the increments and the regs per val based on the mode. */
10007 reg_increment = GET_MODE_SIZE (mode);
10008 regs_per_val = reg_increment / 4;
10009 offset_adj = return_pc ? 1 : 0;
10010
10011 if (count <= 1
10012 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10013 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10014 return false;
10015
10016 /* Check if this is a write-back. */
10017 elt = XVECEXP (op, 0, offset_adj);
10018 if (GET_CODE (SET_SRC (elt)) == PLUS)
10019 {
10020 i++;
10021 base = 1;
10022 update = true;
10023
10024 /* The offset adjustment must be the number of registers being
10025 popped times the size of a single register. */
10026 if (!REG_P (SET_DEST (elt))
10027 || !REG_P (XEXP (SET_SRC (elt), 0))
10028 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10029 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10030 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10031 ((count - 1 - offset_adj) * reg_increment))
10032 return false;
10033 }
10034
10035 i = i + offset_adj;
10036 base = base + offset_adj;
10037 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10038 success depends on the type: VLDM can do just one reg,
10039 LDM must do at least two. */
10040 if ((count <= i) && (mode == SImode))
10041 return false;
10042
10043 elt = XVECEXP (op, 0, i - 1);
10044 if (GET_CODE (elt) != SET)
10045 return false;
10046
10047 if (load)
10048 {
10049 reg = SET_DEST (elt);
10050 mem = SET_SRC (elt);
10051 }
10052 else
10053 {
10054 reg = SET_SRC (elt);
10055 mem = SET_DEST (elt);
10056 }
10057
10058 if (!REG_P (reg) || !MEM_P (mem))
10059 return false;
10060
10061 regno = REGNO (reg);
10062 first_regno = regno;
10063 addr = XEXP (mem, 0);
10064 if (GET_CODE (addr) == PLUS)
10065 {
10066 if (!CONST_INT_P (XEXP (addr, 1)))
10067 return false;
10068
10069 offset = INTVAL (XEXP (addr, 1));
10070 addr = XEXP (addr, 0);
10071 }
10072
10073 if (!REG_P (addr))
10074 return false;
10075
10076 /* Don't allow SP to be loaded unless it is also the base register. It
10077 guarantees that SP is reset correctly when an LDM instruction
10078 is interruptted. Otherwise, we might end up with a corrupt stack. */
10079 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10080 return false;
10081
10082 for (; i < count; i++)
10083 {
10084 elt = XVECEXP (op, 0, i);
10085 if (GET_CODE (elt) != SET)
10086 return false;
10087
10088 if (load)
10089 {
10090 reg = SET_DEST (elt);
10091 mem = SET_SRC (elt);
10092 }
10093 else
10094 {
10095 reg = SET_SRC (elt);
10096 mem = SET_DEST (elt);
10097 }
10098
10099 if (!REG_P (reg)
10100 || GET_MODE (reg) != mode
10101 || REGNO (reg) <= regno
10102 || (consecutive
10103 && (REGNO (reg) !=
10104 (unsigned int) (first_regno + regs_per_val * (i - base))))
10105 /* Don't allow SP to be loaded unless it is also the base register. It
10106 guarantees that SP is reset correctly when an LDM instruction
10107 is interrupted. Otherwise, we might end up with a corrupt stack. */
10108 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10109 || !MEM_P (mem)
10110 || GET_MODE (mem) != mode
10111 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10112 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10113 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10114 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10115 offset + (i - base) * reg_increment))
10116 && (!REG_P (XEXP (mem, 0))
10117 || offset + (i - base) * reg_increment != 0)))
10118 return false;
10119
10120 regno = REGNO (reg);
10121 if (regno == REGNO (addr))
10122 addr_reg_in_reglist = true;
10123 }
10124
10125 if (load)
10126 {
10127 if (update && addr_reg_in_reglist)
10128 return false;
10129
10130 /* For Thumb-1, address register is always modified - either by write-back
10131 or by explicit load. If the pattern does not describe an update,
10132 then the address register must be in the list of loaded registers. */
10133 if (TARGET_THUMB1)
10134 return update || addr_reg_in_reglist;
10135 }
10136
10137 return true;
10138 }
10139
10140 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10141 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10142 instruction. ADD_OFFSET is nonzero if the base address register needs
10143 to be modified with an add instruction before we can use it. */
10144
10145 static bool
10146 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10147 int nops, HOST_WIDE_INT add_offset)
10148 {
10149 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10150 if the offset isn't small enough. The reason 2 ldrs are faster
10151 is because these ARMs are able to do more than one cache access
10152 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10153 whilst the ARM8 has a double bandwidth cache. This means that
10154 these cores can do both an instruction fetch and a data fetch in
10155 a single cycle, so the trick of calculating the address into a
10156 scratch register (one of the result regs) and then doing a load
10157 multiple actually becomes slower (and no smaller in code size).
10158 That is the transformation
10159
10160 ldr rd1, [rbase + offset]
10161 ldr rd2, [rbase + offset + 4]
10162
10163 to
10164
10165 add rd1, rbase, offset
10166 ldmia rd1, {rd1, rd2}
10167
10168 produces worse code -- '3 cycles + any stalls on rd2' instead of
10169 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10170 access per cycle, the first sequence could never complete in less
10171 than 6 cycles, whereas the ldm sequence would only take 5 and
10172 would make better use of sequential accesses if not hitting the
10173 cache.
10174
10175 We cheat here and test 'arm_ld_sched' which we currently know to
10176 only be true for the ARM8, ARM9 and StrongARM. If this ever
10177 changes, then the test below needs to be reworked. */
10178 if (nops == 2 && arm_ld_sched && add_offset != 0)
10179 return false;
10180
10181 /* XScale has load-store double instructions, but they have stricter
10182 alignment requirements than load-store multiple, so we cannot
10183 use them.
10184
10185 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10186 the pipeline until completion.
10187
10188 NREGS CYCLES
10189 1 3
10190 2 4
10191 3 5
10192 4 6
10193
10194 An ldr instruction takes 1-3 cycles, but does not block the
10195 pipeline.
10196
10197 NREGS CYCLES
10198 1 1-3
10199 2 2-6
10200 3 3-9
10201 4 4-12
10202
10203 Best case ldr will always win. However, the more ldr instructions
10204 we issue, the less likely we are to be able to schedule them well.
10205 Using ldr instructions also increases code size.
10206
10207 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10208 for counts of 3 or 4 regs. */
10209 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10210 return false;
10211 return true;
10212 }
10213
10214 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10215 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10216 an array ORDER which describes the sequence to use when accessing the
10217 offsets that produces an ascending order. In this sequence, each
10218 offset must be larger by exactly 4 than the previous one. ORDER[0]
10219 must have been filled in with the lowest offset by the caller.
10220 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10221 we use to verify that ORDER produces an ascending order of registers.
10222 Return true if it was possible to construct such an order, false if
10223 not. */
10224
10225 static bool
10226 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10227 int *unsorted_regs)
10228 {
10229 int i;
10230 for (i = 1; i < nops; i++)
10231 {
10232 int j;
10233
10234 order[i] = order[i - 1];
10235 for (j = 0; j < nops; j++)
10236 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10237 {
10238 /* We must find exactly one offset that is higher than the
10239 previous one by 4. */
10240 if (order[i] != order[i - 1])
10241 return false;
10242 order[i] = j;
10243 }
10244 if (order[i] == order[i - 1])
10245 return false;
10246 /* The register numbers must be ascending. */
10247 if (unsorted_regs != NULL
10248 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10249 return false;
10250 }
10251 return true;
10252 }
10253
10254 /* Used to determine in a peephole whether a sequence of load
10255 instructions can be changed into a load-multiple instruction.
10256 NOPS is the number of separate load instructions we are examining. The
10257 first NOPS entries in OPERANDS are the destination registers, the
10258 next NOPS entries are memory operands. If this function is
10259 successful, *BASE is set to the common base register of the memory
10260 accesses; *LOAD_OFFSET is set to the first memory location's offset
10261 from that base register.
10262 REGS is an array filled in with the destination register numbers.
10263 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10264 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10265 the sequence of registers in REGS matches the loads from ascending memory
10266 locations, and the function verifies that the register numbers are
10267 themselves ascending. If CHECK_REGS is false, the register numbers
10268 are stored in the order they are found in the operands. */
10269 static int
10270 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10271 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10272 {
10273 int unsorted_regs[MAX_LDM_STM_OPS];
10274 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10275 int order[MAX_LDM_STM_OPS];
10276 rtx base_reg_rtx = NULL;
10277 int base_reg = -1;
10278 int i, ldm_case;
10279
10280 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10281 easily extended if required. */
10282 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10283
10284 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10285
10286 /* Loop over the operands and check that the memory references are
10287 suitable (i.e. immediate offsets from the same base register). At
10288 the same time, extract the target register, and the memory
10289 offsets. */
10290 for (i = 0; i < nops; i++)
10291 {
10292 rtx reg;
10293 rtx offset;
10294
10295 /* Convert a subreg of a mem into the mem itself. */
10296 if (GET_CODE (operands[nops + i]) == SUBREG)
10297 operands[nops + i] = alter_subreg (operands + (nops + i));
10298
10299 gcc_assert (MEM_P (operands[nops + i]));
10300
10301 /* Don't reorder volatile memory references; it doesn't seem worth
10302 looking for the case where the order is ok anyway. */
10303 if (MEM_VOLATILE_P (operands[nops + i]))
10304 return 0;
10305
10306 offset = const0_rtx;
10307
10308 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10309 || (GET_CODE (reg) == SUBREG
10310 && REG_P (reg = SUBREG_REG (reg))))
10311 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10312 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10313 || (GET_CODE (reg) == SUBREG
10314 && REG_P (reg = SUBREG_REG (reg))))
10315 && (CONST_INT_P (offset
10316 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10317 {
10318 if (i == 0)
10319 {
10320 base_reg = REGNO (reg);
10321 base_reg_rtx = reg;
10322 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10323 return 0;
10324 }
10325 else if (base_reg != (int) REGNO (reg))
10326 /* Not addressed from the same base register. */
10327 return 0;
10328
10329 unsorted_regs[i] = (REG_P (operands[i])
10330 ? REGNO (operands[i])
10331 : REGNO (SUBREG_REG (operands[i])));
10332
10333 /* If it isn't an integer register, or if it overwrites the
10334 base register but isn't the last insn in the list, then
10335 we can't do this. */
10336 if (unsorted_regs[i] < 0
10337 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10338 || unsorted_regs[i] > 14
10339 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10340 return 0;
10341
10342 unsorted_offsets[i] = INTVAL (offset);
10343 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10344 order[0] = i;
10345 }
10346 else
10347 /* Not a suitable memory address. */
10348 return 0;
10349 }
10350
10351 /* All the useful information has now been extracted from the
10352 operands into unsorted_regs and unsorted_offsets; additionally,
10353 order[0] has been set to the lowest offset in the list. Sort
10354 the offsets into order, verifying that they are adjacent, and
10355 check that the register numbers are ascending. */
10356 if (!compute_offset_order (nops, unsorted_offsets, order,
10357 check_regs ? unsorted_regs : NULL))
10358 return 0;
10359
10360 if (saved_order)
10361 memcpy (saved_order, order, sizeof order);
10362
10363 if (base)
10364 {
10365 *base = base_reg;
10366
10367 for (i = 0; i < nops; i++)
10368 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10369
10370 *load_offset = unsorted_offsets[order[0]];
10371 }
10372
10373 if (TARGET_THUMB1
10374 && !peep2_reg_dead_p (nops, base_reg_rtx))
10375 return 0;
10376
10377 if (unsorted_offsets[order[0]] == 0)
10378 ldm_case = 1; /* ldmia */
10379 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10380 ldm_case = 2; /* ldmib */
10381 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10382 ldm_case = 3; /* ldmda */
10383 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10384 ldm_case = 4; /* ldmdb */
10385 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10386 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10387 ldm_case = 5;
10388 else
10389 return 0;
10390
10391 if (!multiple_operation_profitable_p (false, nops,
10392 ldm_case == 5
10393 ? unsorted_offsets[order[0]] : 0))
10394 return 0;
10395
10396 return ldm_case;
10397 }
10398
10399 /* Used to determine in a peephole whether a sequence of store instructions can
10400 be changed into a store-multiple instruction.
10401 NOPS is the number of separate store instructions we are examining.
10402 NOPS_TOTAL is the total number of instructions recognized by the peephole
10403 pattern.
10404 The first NOPS entries in OPERANDS are the source registers, the next
10405 NOPS entries are memory operands. If this function is successful, *BASE is
10406 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10407 to the first memory location's offset from that base register. REGS is an
10408 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10409 likewise filled with the corresponding rtx's.
10410 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10411 numbers to an ascending order of stores.
10412 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10413 from ascending memory locations, and the function verifies that the register
10414 numbers are themselves ascending. If CHECK_REGS is false, the register
10415 numbers are stored in the order they are found in the operands. */
10416 static int
10417 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10418 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10419 HOST_WIDE_INT *load_offset, bool check_regs)
10420 {
10421 int unsorted_regs[MAX_LDM_STM_OPS];
10422 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10423 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10424 int order[MAX_LDM_STM_OPS];
10425 int base_reg = -1;
10426 rtx base_reg_rtx = NULL;
10427 int i, stm_case;
10428
10429 /* Write back of base register is currently only supported for Thumb 1. */
10430 int base_writeback = TARGET_THUMB1;
10431
10432 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10433 easily extended if required. */
10434 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10435
10436 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10437
10438 /* Loop over the operands and check that the memory references are
10439 suitable (i.e. immediate offsets from the same base register). At
10440 the same time, extract the target register, and the memory
10441 offsets. */
10442 for (i = 0; i < nops; i++)
10443 {
10444 rtx reg;
10445 rtx offset;
10446
10447 /* Convert a subreg of a mem into the mem itself. */
10448 if (GET_CODE (operands[nops + i]) == SUBREG)
10449 operands[nops + i] = alter_subreg (operands + (nops + i));
10450
10451 gcc_assert (MEM_P (operands[nops + i]));
10452
10453 /* Don't reorder volatile memory references; it doesn't seem worth
10454 looking for the case where the order is ok anyway. */
10455 if (MEM_VOLATILE_P (operands[nops + i]))
10456 return 0;
10457
10458 offset = const0_rtx;
10459
10460 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10461 || (GET_CODE (reg) == SUBREG
10462 && REG_P (reg = SUBREG_REG (reg))))
10463 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10464 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10465 || (GET_CODE (reg) == SUBREG
10466 && REG_P (reg = SUBREG_REG (reg))))
10467 && (CONST_INT_P (offset
10468 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10469 {
10470 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10471 ? operands[i] : SUBREG_REG (operands[i]));
10472 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10473
10474 if (i == 0)
10475 {
10476 base_reg = REGNO (reg);
10477 base_reg_rtx = reg;
10478 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10479 return 0;
10480 }
10481 else if (base_reg != (int) REGNO (reg))
10482 /* Not addressed from the same base register. */
10483 return 0;
10484
10485 /* If it isn't an integer register, then we can't do this. */
10486 if (unsorted_regs[i] < 0
10487 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10488 /* The effects are unpredictable if the base register is
10489 both updated and stored. */
10490 || (base_writeback && unsorted_regs[i] == base_reg)
10491 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10492 || unsorted_regs[i] > 14)
10493 return 0;
10494
10495 unsorted_offsets[i] = INTVAL (offset);
10496 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10497 order[0] = i;
10498 }
10499 else
10500 /* Not a suitable memory address. */
10501 return 0;
10502 }
10503
10504 /* All the useful information has now been extracted from the
10505 operands into unsorted_regs and unsorted_offsets; additionally,
10506 order[0] has been set to the lowest offset in the list. Sort
10507 the offsets into order, verifying that they are adjacent, and
10508 check that the register numbers are ascending. */
10509 if (!compute_offset_order (nops, unsorted_offsets, order,
10510 check_regs ? unsorted_regs : NULL))
10511 return 0;
10512
10513 if (saved_order)
10514 memcpy (saved_order, order, sizeof order);
10515
10516 if (base)
10517 {
10518 *base = base_reg;
10519
10520 for (i = 0; i < nops; i++)
10521 {
10522 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10523 if (reg_rtxs)
10524 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10525 }
10526
10527 *load_offset = unsorted_offsets[order[0]];
10528 }
10529
10530 if (TARGET_THUMB1
10531 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10532 return 0;
10533
10534 if (unsorted_offsets[order[0]] == 0)
10535 stm_case = 1; /* stmia */
10536 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10537 stm_case = 2; /* stmib */
10538 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10539 stm_case = 3; /* stmda */
10540 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10541 stm_case = 4; /* stmdb */
10542 else
10543 return 0;
10544
10545 if (!multiple_operation_profitable_p (false, nops, 0))
10546 return 0;
10547
10548 return stm_case;
10549 }
10550 \f
10551 /* Routines for use in generating RTL. */
10552
10553 /* Generate a load-multiple instruction. COUNT is the number of loads in
10554 the instruction; REGS and MEMS are arrays containing the operands.
10555 BASEREG is the base register to be used in addressing the memory operands.
10556 WBACK_OFFSET is nonzero if the instruction should update the base
10557 register. */
10558
10559 static rtx
10560 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10561 HOST_WIDE_INT wback_offset)
10562 {
10563 int i = 0, j;
10564 rtx result;
10565
10566 if (!multiple_operation_profitable_p (false, count, 0))
10567 {
10568 rtx seq;
10569
10570 start_sequence ();
10571
10572 for (i = 0; i < count; i++)
10573 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10574
10575 if (wback_offset != 0)
10576 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10577
10578 seq = get_insns ();
10579 end_sequence ();
10580
10581 return seq;
10582 }
10583
10584 result = gen_rtx_PARALLEL (VOIDmode,
10585 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10586 if (wback_offset != 0)
10587 {
10588 XVECEXP (result, 0, 0)
10589 = gen_rtx_SET (VOIDmode, basereg,
10590 plus_constant (Pmode, basereg, wback_offset));
10591 i = 1;
10592 count++;
10593 }
10594
10595 for (j = 0; i < count; i++, j++)
10596 XVECEXP (result, 0, i)
10597 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10598
10599 return result;
10600 }
10601
10602 /* Generate a store-multiple instruction. COUNT is the number of stores in
10603 the instruction; REGS and MEMS are arrays containing the operands.
10604 BASEREG is the base register to be used in addressing the memory operands.
10605 WBACK_OFFSET is nonzero if the instruction should update the base
10606 register. */
10607
10608 static rtx
10609 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10610 HOST_WIDE_INT wback_offset)
10611 {
10612 int i = 0, j;
10613 rtx result;
10614
10615 if (GET_CODE (basereg) == PLUS)
10616 basereg = XEXP (basereg, 0);
10617
10618 if (!multiple_operation_profitable_p (false, count, 0))
10619 {
10620 rtx seq;
10621
10622 start_sequence ();
10623
10624 for (i = 0; i < count; i++)
10625 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10626
10627 if (wback_offset != 0)
10628 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10629
10630 seq = get_insns ();
10631 end_sequence ();
10632
10633 return seq;
10634 }
10635
10636 result = gen_rtx_PARALLEL (VOIDmode,
10637 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10638 if (wback_offset != 0)
10639 {
10640 XVECEXP (result, 0, 0)
10641 = gen_rtx_SET (VOIDmode, basereg,
10642 plus_constant (Pmode, basereg, wback_offset));
10643 i = 1;
10644 count++;
10645 }
10646
10647 for (j = 0; i < count; i++, j++)
10648 XVECEXP (result, 0, i)
10649 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10650
10651 return result;
10652 }
10653
10654 /* Generate either a load-multiple or a store-multiple instruction. This
10655 function can be used in situations where we can start with a single MEM
10656 rtx and adjust its address upwards.
10657 COUNT is the number of operations in the instruction, not counting a
10658 possible update of the base register. REGS is an array containing the
10659 register operands.
10660 BASEREG is the base register to be used in addressing the memory operands,
10661 which are constructed from BASEMEM.
10662 WRITE_BACK specifies whether the generated instruction should include an
10663 update of the base register.
10664 OFFSETP is used to pass an offset to and from this function; this offset
10665 is not used when constructing the address (instead BASEMEM should have an
10666 appropriate offset in its address), it is used only for setting
10667 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10668
10669 static rtx
10670 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10671 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10672 {
10673 rtx mems[MAX_LDM_STM_OPS];
10674 HOST_WIDE_INT offset = *offsetp;
10675 int i;
10676
10677 gcc_assert (count <= MAX_LDM_STM_OPS);
10678
10679 if (GET_CODE (basereg) == PLUS)
10680 basereg = XEXP (basereg, 0);
10681
10682 for (i = 0; i < count; i++)
10683 {
10684 rtx addr = plus_constant (Pmode, basereg, i * 4);
10685 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10686 offset += 4;
10687 }
10688
10689 if (write_back)
10690 *offsetp = offset;
10691
10692 if (is_load)
10693 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10694 write_back ? 4 * count : 0);
10695 else
10696 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10697 write_back ? 4 * count : 0);
10698 }
10699
10700 rtx
10701 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10702 rtx basemem, HOST_WIDE_INT *offsetp)
10703 {
10704 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10705 offsetp);
10706 }
10707
10708 rtx
10709 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10710 rtx basemem, HOST_WIDE_INT *offsetp)
10711 {
10712 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10713 offsetp);
10714 }
10715
10716 /* Called from a peephole2 expander to turn a sequence of loads into an
10717 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10718 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10719 is true if we can reorder the registers because they are used commutatively
10720 subsequently.
10721 Returns true iff we could generate a new instruction. */
10722
10723 bool
10724 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10725 {
10726 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10727 rtx mems[MAX_LDM_STM_OPS];
10728 int i, j, base_reg;
10729 rtx base_reg_rtx;
10730 HOST_WIDE_INT offset;
10731 int write_back = FALSE;
10732 int ldm_case;
10733 rtx addr;
10734
10735 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10736 &base_reg, &offset, !sort_regs);
10737
10738 if (ldm_case == 0)
10739 return false;
10740
10741 if (sort_regs)
10742 for (i = 0; i < nops - 1; i++)
10743 for (j = i + 1; j < nops; j++)
10744 if (regs[i] > regs[j])
10745 {
10746 int t = regs[i];
10747 regs[i] = regs[j];
10748 regs[j] = t;
10749 }
10750 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10751
10752 if (TARGET_THUMB1)
10753 {
10754 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10755 gcc_assert (ldm_case == 1 || ldm_case == 5);
10756 write_back = TRUE;
10757 }
10758
10759 if (ldm_case == 5)
10760 {
10761 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10762 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10763 offset = 0;
10764 if (!TARGET_THUMB1)
10765 {
10766 base_reg = regs[0];
10767 base_reg_rtx = newbase;
10768 }
10769 }
10770
10771 for (i = 0; i < nops; i++)
10772 {
10773 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10774 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10775 SImode, addr, 0);
10776 }
10777 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10778 write_back ? offset + i * 4 : 0));
10779 return true;
10780 }
10781
10782 /* Called from a peephole2 expander to turn a sequence of stores into an
10783 STM instruction. OPERANDS are the operands found by the peephole matcher;
10784 NOPS indicates how many separate stores we are trying to combine.
10785 Returns true iff we could generate a new instruction. */
10786
10787 bool
10788 gen_stm_seq (rtx *operands, int nops)
10789 {
10790 int i;
10791 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10792 rtx mems[MAX_LDM_STM_OPS];
10793 int base_reg;
10794 rtx base_reg_rtx;
10795 HOST_WIDE_INT offset;
10796 int write_back = FALSE;
10797 int stm_case;
10798 rtx addr;
10799 bool base_reg_dies;
10800
10801 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10802 mem_order, &base_reg, &offset, true);
10803
10804 if (stm_case == 0)
10805 return false;
10806
10807 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10808
10809 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10810 if (TARGET_THUMB1)
10811 {
10812 gcc_assert (base_reg_dies);
10813 write_back = TRUE;
10814 }
10815
10816 if (stm_case == 5)
10817 {
10818 gcc_assert (base_reg_dies);
10819 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10820 offset = 0;
10821 }
10822
10823 addr = plus_constant (Pmode, base_reg_rtx, offset);
10824
10825 for (i = 0; i < nops; i++)
10826 {
10827 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10828 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10829 SImode, addr, 0);
10830 }
10831 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10832 write_back ? offset + i * 4 : 0));
10833 return true;
10834 }
10835
10836 /* Called from a peephole2 expander to turn a sequence of stores that are
10837 preceded by constant loads into an STM instruction. OPERANDS are the
10838 operands found by the peephole matcher; NOPS indicates how many
10839 separate stores we are trying to combine; there are 2 * NOPS
10840 instructions in the peephole.
10841 Returns true iff we could generate a new instruction. */
10842
10843 bool
10844 gen_const_stm_seq (rtx *operands, int nops)
10845 {
10846 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10847 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10848 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10849 rtx mems[MAX_LDM_STM_OPS];
10850 int base_reg;
10851 rtx base_reg_rtx;
10852 HOST_WIDE_INT offset;
10853 int write_back = FALSE;
10854 int stm_case;
10855 rtx addr;
10856 bool base_reg_dies;
10857 int i, j;
10858 HARD_REG_SET allocated;
10859
10860 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10861 mem_order, &base_reg, &offset, false);
10862
10863 if (stm_case == 0)
10864 return false;
10865
10866 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10867
10868 /* If the same register is used more than once, try to find a free
10869 register. */
10870 CLEAR_HARD_REG_SET (allocated);
10871 for (i = 0; i < nops; i++)
10872 {
10873 for (j = i + 1; j < nops; j++)
10874 if (regs[i] == regs[j])
10875 {
10876 rtx t = peep2_find_free_register (0, nops * 2,
10877 TARGET_THUMB1 ? "l" : "r",
10878 SImode, &allocated);
10879 if (t == NULL_RTX)
10880 return false;
10881 reg_rtxs[i] = t;
10882 regs[i] = REGNO (t);
10883 }
10884 }
10885
10886 /* Compute an ordering that maps the register numbers to an ascending
10887 sequence. */
10888 reg_order[0] = 0;
10889 for (i = 0; i < nops; i++)
10890 if (regs[i] < regs[reg_order[0]])
10891 reg_order[0] = i;
10892
10893 for (i = 1; i < nops; i++)
10894 {
10895 int this_order = reg_order[i - 1];
10896 for (j = 0; j < nops; j++)
10897 if (regs[j] > regs[reg_order[i - 1]]
10898 && (this_order == reg_order[i - 1]
10899 || regs[j] < regs[this_order]))
10900 this_order = j;
10901 reg_order[i] = this_order;
10902 }
10903
10904 /* Ensure that registers that must be live after the instruction end
10905 up with the correct value. */
10906 for (i = 0; i < nops; i++)
10907 {
10908 int this_order = reg_order[i];
10909 if ((this_order != mem_order[i]
10910 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10911 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10912 return false;
10913 }
10914
10915 /* Load the constants. */
10916 for (i = 0; i < nops; i++)
10917 {
10918 rtx op = operands[2 * nops + mem_order[i]];
10919 sorted_regs[i] = regs[reg_order[i]];
10920 emit_move_insn (reg_rtxs[reg_order[i]], op);
10921 }
10922
10923 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10924
10925 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10926 if (TARGET_THUMB1)
10927 {
10928 gcc_assert (base_reg_dies);
10929 write_back = TRUE;
10930 }
10931
10932 if (stm_case == 5)
10933 {
10934 gcc_assert (base_reg_dies);
10935 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10936 offset = 0;
10937 }
10938
10939 addr = plus_constant (Pmode, base_reg_rtx, offset);
10940
10941 for (i = 0; i < nops; i++)
10942 {
10943 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10944 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10945 SImode, addr, 0);
10946 }
10947 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10948 write_back ? offset + i * 4 : 0));
10949 return true;
10950 }
10951
10952 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10953 unaligned copies on processors which support unaligned semantics for those
10954 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10955 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10956 An interleave factor of 1 (the minimum) will perform no interleaving.
10957 Load/store multiple are used for aligned addresses where possible. */
10958
10959 static void
10960 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10961 HOST_WIDE_INT length,
10962 unsigned int interleave_factor)
10963 {
10964 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10965 int *regnos = XALLOCAVEC (int, interleave_factor);
10966 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10967 HOST_WIDE_INT i, j;
10968 HOST_WIDE_INT remaining = length, words;
10969 rtx halfword_tmp = NULL, byte_tmp = NULL;
10970 rtx dst, src;
10971 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10972 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10973 HOST_WIDE_INT srcoffset, dstoffset;
10974 HOST_WIDE_INT src_autoinc, dst_autoinc;
10975 rtx mem, addr;
10976
10977 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10978
10979 /* Use hard registers if we have aligned source or destination so we can use
10980 load/store multiple with contiguous registers. */
10981 if (dst_aligned || src_aligned)
10982 for (i = 0; i < interleave_factor; i++)
10983 regs[i] = gen_rtx_REG (SImode, i);
10984 else
10985 for (i = 0; i < interleave_factor; i++)
10986 regs[i] = gen_reg_rtx (SImode);
10987
10988 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10989 src = copy_addr_to_reg (XEXP (srcbase, 0));
10990
10991 srcoffset = dstoffset = 0;
10992
10993 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10994 For copying the last bytes we want to subtract this offset again. */
10995 src_autoinc = dst_autoinc = 0;
10996
10997 for (i = 0; i < interleave_factor; i++)
10998 regnos[i] = i;
10999
11000 /* Copy BLOCK_SIZE_BYTES chunks. */
11001
11002 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11003 {
11004 /* Load words. */
11005 if (src_aligned && interleave_factor > 1)
11006 {
11007 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11008 TRUE, srcbase, &srcoffset));
11009 src_autoinc += UNITS_PER_WORD * interleave_factor;
11010 }
11011 else
11012 {
11013 for (j = 0; j < interleave_factor; j++)
11014 {
11015 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11016 - src_autoinc));
11017 mem = adjust_automodify_address (srcbase, SImode, addr,
11018 srcoffset + j * UNITS_PER_WORD);
11019 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11020 }
11021 srcoffset += block_size_bytes;
11022 }
11023
11024 /* Store words. */
11025 if (dst_aligned && interleave_factor > 1)
11026 {
11027 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11028 TRUE, dstbase, &dstoffset));
11029 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11030 }
11031 else
11032 {
11033 for (j = 0; j < interleave_factor; j++)
11034 {
11035 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11036 - dst_autoinc));
11037 mem = adjust_automodify_address (dstbase, SImode, addr,
11038 dstoffset + j * UNITS_PER_WORD);
11039 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11040 }
11041 dstoffset += block_size_bytes;
11042 }
11043
11044 remaining -= block_size_bytes;
11045 }
11046
11047 /* Copy any whole words left (note these aren't interleaved with any
11048 subsequent halfword/byte load/stores in the interests of simplicity). */
11049
11050 words = remaining / UNITS_PER_WORD;
11051
11052 gcc_assert (words < interleave_factor);
11053
11054 if (src_aligned && words > 1)
11055 {
11056 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11057 &srcoffset));
11058 src_autoinc += UNITS_PER_WORD * words;
11059 }
11060 else
11061 {
11062 for (j = 0; j < words; j++)
11063 {
11064 addr = plus_constant (Pmode, src,
11065 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11066 mem = adjust_automodify_address (srcbase, SImode, addr,
11067 srcoffset + j * UNITS_PER_WORD);
11068 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11069 }
11070 srcoffset += words * UNITS_PER_WORD;
11071 }
11072
11073 if (dst_aligned && words > 1)
11074 {
11075 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11076 &dstoffset));
11077 dst_autoinc += words * UNITS_PER_WORD;
11078 }
11079 else
11080 {
11081 for (j = 0; j < words; j++)
11082 {
11083 addr = plus_constant (Pmode, dst,
11084 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11085 mem = adjust_automodify_address (dstbase, SImode, addr,
11086 dstoffset + j * UNITS_PER_WORD);
11087 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11088 }
11089 dstoffset += words * UNITS_PER_WORD;
11090 }
11091
11092 remaining -= words * UNITS_PER_WORD;
11093
11094 gcc_assert (remaining < 4);
11095
11096 /* Copy a halfword if necessary. */
11097
11098 if (remaining >= 2)
11099 {
11100 halfword_tmp = gen_reg_rtx (SImode);
11101
11102 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11103 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11104 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11105
11106 /* Either write out immediately, or delay until we've loaded the last
11107 byte, depending on interleave factor. */
11108 if (interleave_factor == 1)
11109 {
11110 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11111 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11112 emit_insn (gen_unaligned_storehi (mem,
11113 gen_lowpart (HImode, halfword_tmp)));
11114 halfword_tmp = NULL;
11115 dstoffset += 2;
11116 }
11117
11118 remaining -= 2;
11119 srcoffset += 2;
11120 }
11121
11122 gcc_assert (remaining < 2);
11123
11124 /* Copy last byte. */
11125
11126 if ((remaining & 1) != 0)
11127 {
11128 byte_tmp = gen_reg_rtx (SImode);
11129
11130 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11131 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11132 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11133
11134 if (interleave_factor == 1)
11135 {
11136 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11137 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11138 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11139 byte_tmp = NULL;
11140 dstoffset++;
11141 }
11142
11143 remaining--;
11144 srcoffset++;
11145 }
11146
11147 /* Store last halfword if we haven't done so already. */
11148
11149 if (halfword_tmp)
11150 {
11151 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11152 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11153 emit_insn (gen_unaligned_storehi (mem,
11154 gen_lowpart (HImode, halfword_tmp)));
11155 dstoffset += 2;
11156 }
11157
11158 /* Likewise for last byte. */
11159
11160 if (byte_tmp)
11161 {
11162 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11163 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11164 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11165 dstoffset++;
11166 }
11167
11168 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11169 }
11170
11171 /* From mips_adjust_block_mem:
11172
11173 Helper function for doing a loop-based block operation on memory
11174 reference MEM. Each iteration of the loop will operate on LENGTH
11175 bytes of MEM.
11176
11177 Create a new base register for use within the loop and point it to
11178 the start of MEM. Create a new memory reference that uses this
11179 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11180
11181 static void
11182 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11183 rtx *loop_mem)
11184 {
11185 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11186
11187 /* Although the new mem does not refer to a known location,
11188 it does keep up to LENGTH bytes of alignment. */
11189 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11190 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11191 }
11192
11193 /* From mips_block_move_loop:
11194
11195 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11196 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11197 the memory regions do not overlap. */
11198
11199 static void
11200 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11201 unsigned int interleave_factor,
11202 HOST_WIDE_INT bytes_per_iter)
11203 {
11204 rtx label, src_reg, dest_reg, final_src, test;
11205 HOST_WIDE_INT leftover;
11206
11207 leftover = length % bytes_per_iter;
11208 length -= leftover;
11209
11210 /* Create registers and memory references for use within the loop. */
11211 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11212 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11213
11214 /* Calculate the value that SRC_REG should have after the last iteration of
11215 the loop. */
11216 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11217 0, 0, OPTAB_WIDEN);
11218
11219 /* Emit the start of the loop. */
11220 label = gen_label_rtx ();
11221 emit_label (label);
11222
11223 /* Emit the loop body. */
11224 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11225 interleave_factor);
11226
11227 /* Move on to the next block. */
11228 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11229 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11230
11231 /* Emit the loop condition. */
11232 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11233 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11234
11235 /* Mop up any left-over bytes. */
11236 if (leftover)
11237 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11238 }
11239
11240 /* Emit a block move when either the source or destination is unaligned (not
11241 aligned to a four-byte boundary). This may need further tuning depending on
11242 core type, optimize_size setting, etc. */
11243
11244 static int
11245 arm_movmemqi_unaligned (rtx *operands)
11246 {
11247 HOST_WIDE_INT length = INTVAL (operands[2]);
11248
11249 if (optimize_size)
11250 {
11251 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11252 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11253 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11254 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11255 or dst_aligned though: allow more interleaving in those cases since the
11256 resulting code can be smaller. */
11257 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11258 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11259
11260 if (length > 12)
11261 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11262 interleave_factor, bytes_per_iter);
11263 else
11264 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11265 interleave_factor);
11266 }
11267 else
11268 {
11269 /* Note that the loop created by arm_block_move_unaligned_loop may be
11270 subject to loop unrolling, which makes tuning this condition a little
11271 redundant. */
11272 if (length > 32)
11273 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11274 else
11275 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11276 }
11277
11278 return 1;
11279 }
11280
11281 int
11282 arm_gen_movmemqi (rtx *operands)
11283 {
11284 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11285 HOST_WIDE_INT srcoffset, dstoffset;
11286 int i;
11287 rtx src, dst, srcbase, dstbase;
11288 rtx part_bytes_reg = NULL;
11289 rtx mem;
11290
11291 if (!CONST_INT_P (operands[2])
11292 || !CONST_INT_P (operands[3])
11293 || INTVAL (operands[2]) > 64)
11294 return 0;
11295
11296 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11297 return arm_movmemqi_unaligned (operands);
11298
11299 if (INTVAL (operands[3]) & 3)
11300 return 0;
11301
11302 dstbase = operands[0];
11303 srcbase = operands[1];
11304
11305 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11306 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11307
11308 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11309 out_words_to_go = INTVAL (operands[2]) / 4;
11310 last_bytes = INTVAL (operands[2]) & 3;
11311 dstoffset = srcoffset = 0;
11312
11313 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11314 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11315
11316 for (i = 0; in_words_to_go >= 2; i+=4)
11317 {
11318 if (in_words_to_go > 4)
11319 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11320 TRUE, srcbase, &srcoffset));
11321 else
11322 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11323 src, FALSE, srcbase,
11324 &srcoffset));
11325
11326 if (out_words_to_go)
11327 {
11328 if (out_words_to_go > 4)
11329 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11330 TRUE, dstbase, &dstoffset));
11331 else if (out_words_to_go != 1)
11332 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11333 out_words_to_go, dst,
11334 (last_bytes == 0
11335 ? FALSE : TRUE),
11336 dstbase, &dstoffset));
11337 else
11338 {
11339 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11340 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11341 if (last_bytes != 0)
11342 {
11343 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11344 dstoffset += 4;
11345 }
11346 }
11347 }
11348
11349 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11350 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11351 }
11352
11353 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11354 if (out_words_to_go)
11355 {
11356 rtx sreg;
11357
11358 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11359 sreg = copy_to_reg (mem);
11360
11361 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11362 emit_move_insn (mem, sreg);
11363 in_words_to_go--;
11364
11365 gcc_assert (!in_words_to_go); /* Sanity check */
11366 }
11367
11368 if (in_words_to_go)
11369 {
11370 gcc_assert (in_words_to_go > 0);
11371
11372 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11373 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11374 }
11375
11376 gcc_assert (!last_bytes || part_bytes_reg);
11377
11378 if (BYTES_BIG_ENDIAN && last_bytes)
11379 {
11380 rtx tmp = gen_reg_rtx (SImode);
11381
11382 /* The bytes we want are in the top end of the word. */
11383 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11384 GEN_INT (8 * (4 - last_bytes))));
11385 part_bytes_reg = tmp;
11386
11387 while (last_bytes)
11388 {
11389 mem = adjust_automodify_address (dstbase, QImode,
11390 plus_constant (Pmode, dst,
11391 last_bytes - 1),
11392 dstoffset + last_bytes - 1);
11393 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11394
11395 if (--last_bytes)
11396 {
11397 tmp = gen_reg_rtx (SImode);
11398 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11399 part_bytes_reg = tmp;
11400 }
11401 }
11402
11403 }
11404 else
11405 {
11406 if (last_bytes > 1)
11407 {
11408 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11409 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11410 last_bytes -= 2;
11411 if (last_bytes)
11412 {
11413 rtx tmp = gen_reg_rtx (SImode);
11414 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11415 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11416 part_bytes_reg = tmp;
11417 dstoffset += 2;
11418 }
11419 }
11420
11421 if (last_bytes)
11422 {
11423 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11424 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11425 }
11426 }
11427
11428 return 1;
11429 }
11430
11431 /* Select a dominance comparison mode if possible for a test of the general
11432 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11433 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11434 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11435 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11436 In all cases OP will be either EQ or NE, but we don't need to know which
11437 here. If we are unable to support a dominance comparison we return
11438 CC mode. This will then fail to match for the RTL expressions that
11439 generate this call. */
11440 enum machine_mode
11441 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11442 {
11443 enum rtx_code cond1, cond2;
11444 int swapped = 0;
11445
11446 /* Currently we will probably get the wrong result if the individual
11447 comparisons are not simple. This also ensures that it is safe to
11448 reverse a comparison if necessary. */
11449 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11450 != CCmode)
11451 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11452 != CCmode))
11453 return CCmode;
11454
11455 /* The if_then_else variant of this tests the second condition if the
11456 first passes, but is true if the first fails. Reverse the first
11457 condition to get a true "inclusive-or" expression. */
11458 if (cond_or == DOM_CC_NX_OR_Y)
11459 cond1 = reverse_condition (cond1);
11460
11461 /* If the comparisons are not equal, and one doesn't dominate the other,
11462 then we can't do this. */
11463 if (cond1 != cond2
11464 && !comparison_dominates_p (cond1, cond2)
11465 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11466 return CCmode;
11467
11468 if (swapped)
11469 {
11470 enum rtx_code temp = cond1;
11471 cond1 = cond2;
11472 cond2 = temp;
11473 }
11474
11475 switch (cond1)
11476 {
11477 case EQ:
11478 if (cond_or == DOM_CC_X_AND_Y)
11479 return CC_DEQmode;
11480
11481 switch (cond2)
11482 {
11483 case EQ: return CC_DEQmode;
11484 case LE: return CC_DLEmode;
11485 case LEU: return CC_DLEUmode;
11486 case GE: return CC_DGEmode;
11487 case GEU: return CC_DGEUmode;
11488 default: gcc_unreachable ();
11489 }
11490
11491 case LT:
11492 if (cond_or == DOM_CC_X_AND_Y)
11493 return CC_DLTmode;
11494
11495 switch (cond2)
11496 {
11497 case LT:
11498 return CC_DLTmode;
11499 case LE:
11500 return CC_DLEmode;
11501 case NE:
11502 return CC_DNEmode;
11503 default:
11504 gcc_unreachable ();
11505 }
11506
11507 case GT:
11508 if (cond_or == DOM_CC_X_AND_Y)
11509 return CC_DGTmode;
11510
11511 switch (cond2)
11512 {
11513 case GT:
11514 return CC_DGTmode;
11515 case GE:
11516 return CC_DGEmode;
11517 case NE:
11518 return CC_DNEmode;
11519 default:
11520 gcc_unreachable ();
11521 }
11522
11523 case LTU:
11524 if (cond_or == DOM_CC_X_AND_Y)
11525 return CC_DLTUmode;
11526
11527 switch (cond2)
11528 {
11529 case LTU:
11530 return CC_DLTUmode;
11531 case LEU:
11532 return CC_DLEUmode;
11533 case NE:
11534 return CC_DNEmode;
11535 default:
11536 gcc_unreachable ();
11537 }
11538
11539 case GTU:
11540 if (cond_or == DOM_CC_X_AND_Y)
11541 return CC_DGTUmode;
11542
11543 switch (cond2)
11544 {
11545 case GTU:
11546 return CC_DGTUmode;
11547 case GEU:
11548 return CC_DGEUmode;
11549 case NE:
11550 return CC_DNEmode;
11551 default:
11552 gcc_unreachable ();
11553 }
11554
11555 /* The remaining cases only occur when both comparisons are the
11556 same. */
11557 case NE:
11558 gcc_assert (cond1 == cond2);
11559 return CC_DNEmode;
11560
11561 case LE:
11562 gcc_assert (cond1 == cond2);
11563 return CC_DLEmode;
11564
11565 case GE:
11566 gcc_assert (cond1 == cond2);
11567 return CC_DGEmode;
11568
11569 case LEU:
11570 gcc_assert (cond1 == cond2);
11571 return CC_DLEUmode;
11572
11573 case GEU:
11574 gcc_assert (cond1 == cond2);
11575 return CC_DGEUmode;
11576
11577 default:
11578 gcc_unreachable ();
11579 }
11580 }
11581
11582 enum machine_mode
11583 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11584 {
11585 /* All floating point compares return CCFP if it is an equality
11586 comparison, and CCFPE otherwise. */
11587 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11588 {
11589 switch (op)
11590 {
11591 case EQ:
11592 case NE:
11593 case UNORDERED:
11594 case ORDERED:
11595 case UNLT:
11596 case UNLE:
11597 case UNGT:
11598 case UNGE:
11599 case UNEQ:
11600 case LTGT:
11601 return CCFPmode;
11602
11603 case LT:
11604 case LE:
11605 case GT:
11606 case GE:
11607 return CCFPEmode;
11608
11609 default:
11610 gcc_unreachable ();
11611 }
11612 }
11613
11614 /* A compare with a shifted operand. Because of canonicalization, the
11615 comparison will have to be swapped when we emit the assembler. */
11616 if (GET_MODE (y) == SImode
11617 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11618 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11619 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11620 || GET_CODE (x) == ROTATERT))
11621 return CC_SWPmode;
11622
11623 /* This operation is performed swapped, but since we only rely on the Z
11624 flag we don't need an additional mode. */
11625 if (GET_MODE (y) == SImode
11626 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11627 && GET_CODE (x) == NEG
11628 && (op == EQ || op == NE))
11629 return CC_Zmode;
11630
11631 /* This is a special case that is used by combine to allow a
11632 comparison of a shifted byte load to be split into a zero-extend
11633 followed by a comparison of the shifted integer (only valid for
11634 equalities and unsigned inequalities). */
11635 if (GET_MODE (x) == SImode
11636 && GET_CODE (x) == ASHIFT
11637 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
11638 && GET_CODE (XEXP (x, 0)) == SUBREG
11639 && MEM_P (SUBREG_REG (XEXP (x, 0)))
11640 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11641 && (op == EQ || op == NE
11642 || op == GEU || op == GTU || op == LTU || op == LEU)
11643 && CONST_INT_P (y))
11644 return CC_Zmode;
11645
11646 /* A construct for a conditional compare, if the false arm contains
11647 0, then both conditions must be true, otherwise either condition
11648 must be true. Not all conditions are possible, so CCmode is
11649 returned if it can't be done. */
11650 if (GET_CODE (x) == IF_THEN_ELSE
11651 && (XEXP (x, 2) == const0_rtx
11652 || XEXP (x, 2) == const1_rtx)
11653 && COMPARISON_P (XEXP (x, 0))
11654 && COMPARISON_P (XEXP (x, 1)))
11655 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11656 INTVAL (XEXP (x, 2)));
11657
11658 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11659 if (GET_CODE (x) == AND
11660 && (op == EQ || op == NE)
11661 && COMPARISON_P (XEXP (x, 0))
11662 && COMPARISON_P (XEXP (x, 1)))
11663 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11664 DOM_CC_X_AND_Y);
11665
11666 if (GET_CODE (x) == IOR
11667 && (op == EQ || op == NE)
11668 && COMPARISON_P (XEXP (x, 0))
11669 && COMPARISON_P (XEXP (x, 1)))
11670 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11671 DOM_CC_X_OR_Y);
11672
11673 /* An operation (on Thumb) where we want to test for a single bit.
11674 This is done by shifting that bit up into the top bit of a
11675 scratch register; we can then branch on the sign bit. */
11676 if (TARGET_THUMB1
11677 && GET_MODE (x) == SImode
11678 && (op == EQ || op == NE)
11679 && GET_CODE (x) == ZERO_EXTRACT
11680 && XEXP (x, 1) == const1_rtx)
11681 return CC_Nmode;
11682
11683 /* An operation that sets the condition codes as a side-effect, the
11684 V flag is not set correctly, so we can only use comparisons where
11685 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11686 instead.) */
11687 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11688 if (GET_MODE (x) == SImode
11689 && y == const0_rtx
11690 && (op == EQ || op == NE || op == LT || op == GE)
11691 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11692 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11693 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11694 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11695 || GET_CODE (x) == LSHIFTRT
11696 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11697 || GET_CODE (x) == ROTATERT
11698 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11699 return CC_NOOVmode;
11700
11701 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11702 return CC_Zmode;
11703
11704 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11705 && GET_CODE (x) == PLUS
11706 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11707 return CC_Cmode;
11708
11709 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11710 {
11711 switch (op)
11712 {
11713 case EQ:
11714 case NE:
11715 /* A DImode comparison against zero can be implemented by
11716 or'ing the two halves together. */
11717 if (y == const0_rtx)
11718 return CC_Zmode;
11719
11720 /* We can do an equality test in three Thumb instructions. */
11721 if (!TARGET_32BIT)
11722 return CC_Zmode;
11723
11724 /* FALLTHROUGH */
11725
11726 case LTU:
11727 case LEU:
11728 case GTU:
11729 case GEU:
11730 /* DImode unsigned comparisons can be implemented by cmp +
11731 cmpeq without a scratch register. Not worth doing in
11732 Thumb-2. */
11733 if (TARGET_32BIT)
11734 return CC_CZmode;
11735
11736 /* FALLTHROUGH */
11737
11738 case LT:
11739 case LE:
11740 case GT:
11741 case GE:
11742 /* DImode signed and unsigned comparisons can be implemented
11743 by cmp + sbcs with a scratch register, but that does not
11744 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11745 gcc_assert (op != EQ && op != NE);
11746 return CC_NCVmode;
11747
11748 default:
11749 gcc_unreachable ();
11750 }
11751 }
11752
11753 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11754 return GET_MODE (x);
11755
11756 return CCmode;
11757 }
11758
11759 /* X and Y are two things to compare using CODE. Emit the compare insn and
11760 return the rtx for register 0 in the proper mode. FP means this is a
11761 floating point compare: I don't think that it is needed on the arm. */
11762 rtx
11763 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11764 {
11765 enum machine_mode mode;
11766 rtx cc_reg;
11767 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11768
11769 /* We might have X as a constant, Y as a register because of the predicates
11770 used for cmpdi. If so, force X to a register here. */
11771 if (dimode_comparison && !REG_P (x))
11772 x = force_reg (DImode, x);
11773
11774 mode = SELECT_CC_MODE (code, x, y);
11775 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11776
11777 if (dimode_comparison
11778 && mode != CC_CZmode)
11779 {
11780 rtx clobber, set;
11781
11782 /* To compare two non-zero values for equality, XOR them and
11783 then compare against zero. Not used for ARM mode; there
11784 CC_CZmode is cheaper. */
11785 if (mode == CC_Zmode && y != const0_rtx)
11786 {
11787 gcc_assert (!reload_completed);
11788 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11789 y = const0_rtx;
11790 }
11791
11792 /* A scratch register is required. */
11793 if (reload_completed)
11794 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11795 else
11796 scratch = gen_rtx_SCRATCH (SImode);
11797
11798 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11799 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11800 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11801 }
11802 else
11803 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11804
11805 return cc_reg;
11806 }
11807
11808 /* Generate a sequence of insns that will generate the correct return
11809 address mask depending on the physical architecture that the program
11810 is running on. */
11811 rtx
11812 arm_gen_return_addr_mask (void)
11813 {
11814 rtx reg = gen_reg_rtx (Pmode);
11815
11816 emit_insn (gen_return_addr_mask (reg));
11817 return reg;
11818 }
11819
11820 void
11821 arm_reload_in_hi (rtx *operands)
11822 {
11823 rtx ref = operands[1];
11824 rtx base, scratch;
11825 HOST_WIDE_INT offset = 0;
11826
11827 if (GET_CODE (ref) == SUBREG)
11828 {
11829 offset = SUBREG_BYTE (ref);
11830 ref = SUBREG_REG (ref);
11831 }
11832
11833 if (REG_P (ref))
11834 {
11835 /* We have a pseudo which has been spilt onto the stack; there
11836 are two cases here: the first where there is a simple
11837 stack-slot replacement and a second where the stack-slot is
11838 out of range, or is used as a subreg. */
11839 if (reg_equiv_mem (REGNO (ref)))
11840 {
11841 ref = reg_equiv_mem (REGNO (ref));
11842 base = find_replacement (&XEXP (ref, 0));
11843 }
11844 else
11845 /* The slot is out of range, or was dressed up in a SUBREG. */
11846 base = reg_equiv_address (REGNO (ref));
11847 }
11848 else
11849 base = find_replacement (&XEXP (ref, 0));
11850
11851 /* Handle the case where the address is too complex to be offset by 1. */
11852 if (GET_CODE (base) == MINUS
11853 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
11854 {
11855 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11856
11857 emit_set_insn (base_plus, base);
11858 base = base_plus;
11859 }
11860 else if (GET_CODE (base) == PLUS)
11861 {
11862 /* The addend must be CONST_INT, or we would have dealt with it above. */
11863 HOST_WIDE_INT hi, lo;
11864
11865 offset += INTVAL (XEXP (base, 1));
11866 base = XEXP (base, 0);
11867
11868 /* Rework the address into a legal sequence of insns. */
11869 /* Valid range for lo is -4095 -> 4095 */
11870 lo = (offset >= 0
11871 ? (offset & 0xfff)
11872 : -((-offset) & 0xfff));
11873
11874 /* Corner case, if lo is the max offset then we would be out of range
11875 once we have added the additional 1 below, so bump the msb into the
11876 pre-loading insn(s). */
11877 if (lo == 4095)
11878 lo &= 0x7ff;
11879
11880 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11881 ^ (HOST_WIDE_INT) 0x80000000)
11882 - (HOST_WIDE_INT) 0x80000000);
11883
11884 gcc_assert (hi + lo == offset);
11885
11886 if (hi != 0)
11887 {
11888 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11889
11890 /* Get the base address; addsi3 knows how to handle constants
11891 that require more than one insn. */
11892 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11893 base = base_plus;
11894 offset = lo;
11895 }
11896 }
11897
11898 /* Operands[2] may overlap operands[0] (though it won't overlap
11899 operands[1]), that's why we asked for a DImode reg -- so we can
11900 use the bit that does not overlap. */
11901 if (REGNO (operands[2]) == REGNO (operands[0]))
11902 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11903 else
11904 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11905
11906 emit_insn (gen_zero_extendqisi2 (scratch,
11907 gen_rtx_MEM (QImode,
11908 plus_constant (Pmode, base,
11909 offset))));
11910 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11911 gen_rtx_MEM (QImode,
11912 plus_constant (Pmode, base,
11913 offset + 1))));
11914 if (!BYTES_BIG_ENDIAN)
11915 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11916 gen_rtx_IOR (SImode,
11917 gen_rtx_ASHIFT
11918 (SImode,
11919 gen_rtx_SUBREG (SImode, operands[0], 0),
11920 GEN_INT (8)),
11921 scratch));
11922 else
11923 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11924 gen_rtx_IOR (SImode,
11925 gen_rtx_ASHIFT (SImode, scratch,
11926 GEN_INT (8)),
11927 gen_rtx_SUBREG (SImode, operands[0], 0)));
11928 }
11929
11930 /* Handle storing a half-word to memory during reload by synthesizing as two
11931 byte stores. Take care not to clobber the input values until after we
11932 have moved them somewhere safe. This code assumes that if the DImode
11933 scratch in operands[2] overlaps either the input value or output address
11934 in some way, then that value must die in this insn (we absolutely need
11935 two scratch registers for some corner cases). */
11936 void
11937 arm_reload_out_hi (rtx *operands)
11938 {
11939 rtx ref = operands[0];
11940 rtx outval = operands[1];
11941 rtx base, scratch;
11942 HOST_WIDE_INT offset = 0;
11943
11944 if (GET_CODE (ref) == SUBREG)
11945 {
11946 offset = SUBREG_BYTE (ref);
11947 ref = SUBREG_REG (ref);
11948 }
11949
11950 if (REG_P (ref))
11951 {
11952 /* We have a pseudo which has been spilt onto the stack; there
11953 are two cases here: the first where there is a simple
11954 stack-slot replacement and a second where the stack-slot is
11955 out of range, or is used as a subreg. */
11956 if (reg_equiv_mem (REGNO (ref)))
11957 {
11958 ref = reg_equiv_mem (REGNO (ref));
11959 base = find_replacement (&XEXP (ref, 0));
11960 }
11961 else
11962 /* The slot is out of range, or was dressed up in a SUBREG. */
11963 base = reg_equiv_address (REGNO (ref));
11964 }
11965 else
11966 base = find_replacement (&XEXP (ref, 0));
11967
11968 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11969
11970 /* Handle the case where the address is too complex to be offset by 1. */
11971 if (GET_CODE (base) == MINUS
11972 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
11973 {
11974 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11975
11976 /* Be careful not to destroy OUTVAL. */
11977 if (reg_overlap_mentioned_p (base_plus, outval))
11978 {
11979 /* Updating base_plus might destroy outval, see if we can
11980 swap the scratch and base_plus. */
11981 if (!reg_overlap_mentioned_p (scratch, outval))
11982 {
11983 rtx tmp = scratch;
11984 scratch = base_plus;
11985 base_plus = tmp;
11986 }
11987 else
11988 {
11989 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11990
11991 /* Be conservative and copy OUTVAL into the scratch now,
11992 this should only be necessary if outval is a subreg
11993 of something larger than a word. */
11994 /* XXX Might this clobber base? I can't see how it can,
11995 since scratch is known to overlap with OUTVAL, and
11996 must be wider than a word. */
11997 emit_insn (gen_movhi (scratch_hi, outval));
11998 outval = scratch_hi;
11999 }
12000 }
12001
12002 emit_set_insn (base_plus, base);
12003 base = base_plus;
12004 }
12005 else if (GET_CODE (base) == PLUS)
12006 {
12007 /* The addend must be CONST_INT, or we would have dealt with it above. */
12008 HOST_WIDE_INT hi, lo;
12009
12010 offset += INTVAL (XEXP (base, 1));
12011 base = XEXP (base, 0);
12012
12013 /* Rework the address into a legal sequence of insns. */
12014 /* Valid range for lo is -4095 -> 4095 */
12015 lo = (offset >= 0
12016 ? (offset & 0xfff)
12017 : -((-offset) & 0xfff));
12018
12019 /* Corner case, if lo is the max offset then we would be out of range
12020 once we have added the additional 1 below, so bump the msb into the
12021 pre-loading insn(s). */
12022 if (lo == 4095)
12023 lo &= 0x7ff;
12024
12025 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12026 ^ (HOST_WIDE_INT) 0x80000000)
12027 - (HOST_WIDE_INT) 0x80000000);
12028
12029 gcc_assert (hi + lo == offset);
12030
12031 if (hi != 0)
12032 {
12033 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12034
12035 /* Be careful not to destroy OUTVAL. */
12036 if (reg_overlap_mentioned_p (base_plus, outval))
12037 {
12038 /* Updating base_plus might destroy outval, see if we
12039 can swap the scratch and base_plus. */
12040 if (!reg_overlap_mentioned_p (scratch, outval))
12041 {
12042 rtx tmp = scratch;
12043 scratch = base_plus;
12044 base_plus = tmp;
12045 }
12046 else
12047 {
12048 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12049
12050 /* Be conservative and copy outval into scratch now,
12051 this should only be necessary if outval is a
12052 subreg of something larger than a word. */
12053 /* XXX Might this clobber base? I can't see how it
12054 can, since scratch is known to overlap with
12055 outval. */
12056 emit_insn (gen_movhi (scratch_hi, outval));
12057 outval = scratch_hi;
12058 }
12059 }
12060
12061 /* Get the base address; addsi3 knows how to handle constants
12062 that require more than one insn. */
12063 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12064 base = base_plus;
12065 offset = lo;
12066 }
12067 }
12068
12069 if (BYTES_BIG_ENDIAN)
12070 {
12071 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12072 plus_constant (Pmode, base,
12073 offset + 1)),
12074 gen_lowpart (QImode, outval)));
12075 emit_insn (gen_lshrsi3 (scratch,
12076 gen_rtx_SUBREG (SImode, outval, 0),
12077 GEN_INT (8)));
12078 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12079 offset)),
12080 gen_lowpart (QImode, scratch)));
12081 }
12082 else
12083 {
12084 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12085 offset)),
12086 gen_lowpart (QImode, outval)));
12087 emit_insn (gen_lshrsi3 (scratch,
12088 gen_rtx_SUBREG (SImode, outval, 0),
12089 GEN_INT (8)));
12090 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12091 plus_constant (Pmode, base,
12092 offset + 1)),
12093 gen_lowpart (QImode, scratch)));
12094 }
12095 }
12096
12097 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12098 (padded to the size of a word) should be passed in a register. */
12099
12100 static bool
12101 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12102 {
12103 if (TARGET_AAPCS_BASED)
12104 return must_pass_in_stack_var_size (mode, type);
12105 else
12106 return must_pass_in_stack_var_size_or_pad (mode, type);
12107 }
12108
12109
12110 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12111 Return true if an argument passed on the stack should be padded upwards,
12112 i.e. if the least-significant byte has useful data.
12113 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12114 aggregate types are placed in the lowest memory address. */
12115
12116 bool
12117 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12118 {
12119 if (!TARGET_AAPCS_BASED)
12120 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12121
12122 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12123 return false;
12124
12125 return true;
12126 }
12127
12128
12129 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12130 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12131 register has useful data, and return the opposite if the most
12132 significant byte does. */
12133
12134 bool
12135 arm_pad_reg_upward (enum machine_mode mode,
12136 tree type, int first ATTRIBUTE_UNUSED)
12137 {
12138 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12139 {
12140 /* For AAPCS, small aggregates, small fixed-point types,
12141 and small complex types are always padded upwards. */
12142 if (type)
12143 {
12144 if ((AGGREGATE_TYPE_P (type)
12145 || TREE_CODE (type) == COMPLEX_TYPE
12146 || FIXED_POINT_TYPE_P (type))
12147 && int_size_in_bytes (type) <= 4)
12148 return true;
12149 }
12150 else
12151 {
12152 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12153 && GET_MODE_SIZE (mode) <= 4)
12154 return true;
12155 }
12156 }
12157
12158 /* Otherwise, use default padding. */
12159 return !BYTES_BIG_ENDIAN;
12160 }
12161
12162 \f
12163 /* Print a symbolic form of X to the debug file, F. */
12164 static void
12165 arm_print_value (FILE *f, rtx x)
12166 {
12167 switch (GET_CODE (x))
12168 {
12169 case CONST_INT:
12170 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12171 return;
12172
12173 case CONST_DOUBLE:
12174 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12175 return;
12176
12177 case CONST_VECTOR:
12178 {
12179 int i;
12180
12181 fprintf (f, "<");
12182 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12183 {
12184 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12185 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12186 fputc (',', f);
12187 }
12188 fprintf (f, ">");
12189 }
12190 return;
12191
12192 case CONST_STRING:
12193 fprintf (f, "\"%s\"", XSTR (x, 0));
12194 return;
12195
12196 case SYMBOL_REF:
12197 fprintf (f, "`%s'", XSTR (x, 0));
12198 return;
12199
12200 case LABEL_REF:
12201 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12202 return;
12203
12204 case CONST:
12205 arm_print_value (f, XEXP (x, 0));
12206 return;
12207
12208 case PLUS:
12209 arm_print_value (f, XEXP (x, 0));
12210 fprintf (f, "+");
12211 arm_print_value (f, XEXP (x, 1));
12212 return;
12213
12214 case PC:
12215 fprintf (f, "pc");
12216 return;
12217
12218 default:
12219 fprintf (f, "????");
12220 return;
12221 }
12222 }
12223 \f
12224 /* Routines for manipulation of the constant pool. */
12225
12226 /* Arm instructions cannot load a large constant directly into a
12227 register; they have to come from a pc relative load. The constant
12228 must therefore be placed in the addressable range of the pc
12229 relative load. Depending on the precise pc relative load
12230 instruction the range is somewhere between 256 bytes and 4k. This
12231 means that we often have to dump a constant inside a function, and
12232 generate code to branch around it.
12233
12234 It is important to minimize this, since the branches will slow
12235 things down and make the code larger.
12236
12237 Normally we can hide the table after an existing unconditional
12238 branch so that there is no interruption of the flow, but in the
12239 worst case the code looks like this:
12240
12241 ldr rn, L1
12242 ...
12243 b L2
12244 align
12245 L1: .long value
12246 L2:
12247 ...
12248
12249 ldr rn, L3
12250 ...
12251 b L4
12252 align
12253 L3: .long value
12254 L4:
12255 ...
12256
12257 We fix this by performing a scan after scheduling, which notices
12258 which instructions need to have their operands fetched from the
12259 constant table and builds the table.
12260
12261 The algorithm starts by building a table of all the constants that
12262 need fixing up and all the natural barriers in the function (places
12263 where a constant table can be dropped without breaking the flow).
12264 For each fixup we note how far the pc-relative replacement will be
12265 able to reach and the offset of the instruction into the function.
12266
12267 Having built the table we then group the fixes together to form
12268 tables that are as large as possible (subject to addressing
12269 constraints) and emit each table of constants after the last
12270 barrier that is within range of all the instructions in the group.
12271 If a group does not contain a barrier, then we forcibly create one
12272 by inserting a jump instruction into the flow. Once the table has
12273 been inserted, the insns are then modified to reference the
12274 relevant entry in the pool.
12275
12276 Possible enhancements to the algorithm (not implemented) are:
12277
12278 1) For some processors and object formats, there may be benefit in
12279 aligning the pools to the start of cache lines; this alignment
12280 would need to be taken into account when calculating addressability
12281 of a pool. */
12282
12283 /* These typedefs are located at the start of this file, so that
12284 they can be used in the prototypes there. This comment is to
12285 remind readers of that fact so that the following structures
12286 can be understood more easily.
12287
12288 typedef struct minipool_node Mnode;
12289 typedef struct minipool_fixup Mfix; */
12290
12291 struct minipool_node
12292 {
12293 /* Doubly linked chain of entries. */
12294 Mnode * next;
12295 Mnode * prev;
12296 /* The maximum offset into the code that this entry can be placed. While
12297 pushing fixes for forward references, all entries are sorted in order
12298 of increasing max_address. */
12299 HOST_WIDE_INT max_address;
12300 /* Similarly for an entry inserted for a backwards ref. */
12301 HOST_WIDE_INT min_address;
12302 /* The number of fixes referencing this entry. This can become zero
12303 if we "unpush" an entry. In this case we ignore the entry when we
12304 come to emit the code. */
12305 int refcount;
12306 /* The offset from the start of the minipool. */
12307 HOST_WIDE_INT offset;
12308 /* The value in table. */
12309 rtx value;
12310 /* The mode of value. */
12311 enum machine_mode mode;
12312 /* The size of the value. With iWMMXt enabled
12313 sizes > 4 also imply an alignment of 8-bytes. */
12314 int fix_size;
12315 };
12316
12317 struct minipool_fixup
12318 {
12319 Mfix * next;
12320 rtx insn;
12321 HOST_WIDE_INT address;
12322 rtx * loc;
12323 enum machine_mode mode;
12324 int fix_size;
12325 rtx value;
12326 Mnode * minipool;
12327 HOST_WIDE_INT forwards;
12328 HOST_WIDE_INT backwards;
12329 };
12330
12331 /* Fixes less than a word need padding out to a word boundary. */
12332 #define MINIPOOL_FIX_SIZE(mode) \
12333 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12334
12335 static Mnode * minipool_vector_head;
12336 static Mnode * minipool_vector_tail;
12337 static rtx minipool_vector_label;
12338 static int minipool_pad;
12339
12340 /* The linked list of all minipool fixes required for this function. */
12341 Mfix * minipool_fix_head;
12342 Mfix * minipool_fix_tail;
12343 /* The fix entry for the current minipool, once it has been placed. */
12344 Mfix * minipool_barrier;
12345
12346 /* Determines if INSN is the start of a jump table. Returns the end
12347 of the TABLE or NULL_RTX. */
12348 static rtx
12349 is_jump_table (rtx insn)
12350 {
12351 rtx table;
12352
12353 if (jump_to_label_p (insn)
12354 && ((table = next_real_insn (JUMP_LABEL (insn)))
12355 == next_real_insn (insn))
12356 && table != NULL
12357 && JUMP_P (table)
12358 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12359 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12360 return table;
12361
12362 return NULL_RTX;
12363 }
12364
12365 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12366 #define JUMP_TABLES_IN_TEXT_SECTION 0
12367 #endif
12368
12369 static HOST_WIDE_INT
12370 get_jump_table_size (rtx insn)
12371 {
12372 /* ADDR_VECs only take room if read-only data does into the text
12373 section. */
12374 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12375 {
12376 rtx body = PATTERN (insn);
12377 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12378 HOST_WIDE_INT size;
12379 HOST_WIDE_INT modesize;
12380
12381 modesize = GET_MODE_SIZE (GET_MODE (body));
12382 size = modesize * XVECLEN (body, elt);
12383 switch (modesize)
12384 {
12385 case 1:
12386 /* Round up size of TBB table to a halfword boundary. */
12387 size = (size + 1) & ~(HOST_WIDE_INT)1;
12388 break;
12389 case 2:
12390 /* No padding necessary for TBH. */
12391 break;
12392 case 4:
12393 /* Add two bytes for alignment on Thumb. */
12394 if (TARGET_THUMB)
12395 size += 2;
12396 break;
12397 default:
12398 gcc_unreachable ();
12399 }
12400 return size;
12401 }
12402
12403 return 0;
12404 }
12405
12406 /* Return the maximum amount of padding that will be inserted before
12407 label LABEL. */
12408
12409 static HOST_WIDE_INT
12410 get_label_padding (rtx label)
12411 {
12412 HOST_WIDE_INT align, min_insn_size;
12413
12414 align = 1 << label_to_alignment (label);
12415 min_insn_size = TARGET_THUMB ? 2 : 4;
12416 return align > min_insn_size ? align - min_insn_size : 0;
12417 }
12418
12419 /* Move a minipool fix MP from its current location to before MAX_MP.
12420 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12421 constraints may need updating. */
12422 static Mnode *
12423 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12424 HOST_WIDE_INT max_address)
12425 {
12426 /* The code below assumes these are different. */
12427 gcc_assert (mp != max_mp);
12428
12429 if (max_mp == NULL)
12430 {
12431 if (max_address < mp->max_address)
12432 mp->max_address = max_address;
12433 }
12434 else
12435 {
12436 if (max_address > max_mp->max_address - mp->fix_size)
12437 mp->max_address = max_mp->max_address - mp->fix_size;
12438 else
12439 mp->max_address = max_address;
12440
12441 /* Unlink MP from its current position. Since max_mp is non-null,
12442 mp->prev must be non-null. */
12443 mp->prev->next = mp->next;
12444 if (mp->next != NULL)
12445 mp->next->prev = mp->prev;
12446 else
12447 minipool_vector_tail = mp->prev;
12448
12449 /* Re-insert it before MAX_MP. */
12450 mp->next = max_mp;
12451 mp->prev = max_mp->prev;
12452 max_mp->prev = mp;
12453
12454 if (mp->prev != NULL)
12455 mp->prev->next = mp;
12456 else
12457 minipool_vector_head = mp;
12458 }
12459
12460 /* Save the new entry. */
12461 max_mp = mp;
12462
12463 /* Scan over the preceding entries and adjust their addresses as
12464 required. */
12465 while (mp->prev != NULL
12466 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12467 {
12468 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12469 mp = mp->prev;
12470 }
12471
12472 return max_mp;
12473 }
12474
12475 /* Add a constant to the minipool for a forward reference. Returns the
12476 node added or NULL if the constant will not fit in this pool. */
12477 static Mnode *
12478 add_minipool_forward_ref (Mfix *fix)
12479 {
12480 /* If set, max_mp is the first pool_entry that has a lower
12481 constraint than the one we are trying to add. */
12482 Mnode * max_mp = NULL;
12483 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12484 Mnode * mp;
12485
12486 /* If the minipool starts before the end of FIX->INSN then this FIX
12487 can not be placed into the current pool. Furthermore, adding the
12488 new constant pool entry may cause the pool to start FIX_SIZE bytes
12489 earlier. */
12490 if (minipool_vector_head &&
12491 (fix->address + get_attr_length (fix->insn)
12492 >= minipool_vector_head->max_address - fix->fix_size))
12493 return NULL;
12494
12495 /* Scan the pool to see if a constant with the same value has
12496 already been added. While we are doing this, also note the
12497 location where we must insert the constant if it doesn't already
12498 exist. */
12499 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12500 {
12501 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12502 && fix->mode == mp->mode
12503 && (!LABEL_P (fix->value)
12504 || (CODE_LABEL_NUMBER (fix->value)
12505 == CODE_LABEL_NUMBER (mp->value)))
12506 && rtx_equal_p (fix->value, mp->value))
12507 {
12508 /* More than one fix references this entry. */
12509 mp->refcount++;
12510 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12511 }
12512
12513 /* Note the insertion point if necessary. */
12514 if (max_mp == NULL
12515 && mp->max_address > max_address)
12516 max_mp = mp;
12517
12518 /* If we are inserting an 8-bytes aligned quantity and
12519 we have not already found an insertion point, then
12520 make sure that all such 8-byte aligned quantities are
12521 placed at the start of the pool. */
12522 if (ARM_DOUBLEWORD_ALIGN
12523 && max_mp == NULL
12524 && fix->fix_size >= 8
12525 && mp->fix_size < 8)
12526 {
12527 max_mp = mp;
12528 max_address = mp->max_address;
12529 }
12530 }
12531
12532 /* The value is not currently in the minipool, so we need to create
12533 a new entry for it. If MAX_MP is NULL, the entry will be put on
12534 the end of the list since the placement is less constrained than
12535 any existing entry. Otherwise, we insert the new fix before
12536 MAX_MP and, if necessary, adjust the constraints on the other
12537 entries. */
12538 mp = XNEW (Mnode);
12539 mp->fix_size = fix->fix_size;
12540 mp->mode = fix->mode;
12541 mp->value = fix->value;
12542 mp->refcount = 1;
12543 /* Not yet required for a backwards ref. */
12544 mp->min_address = -65536;
12545
12546 if (max_mp == NULL)
12547 {
12548 mp->max_address = max_address;
12549 mp->next = NULL;
12550 mp->prev = minipool_vector_tail;
12551
12552 if (mp->prev == NULL)
12553 {
12554 minipool_vector_head = mp;
12555 minipool_vector_label = gen_label_rtx ();
12556 }
12557 else
12558 mp->prev->next = mp;
12559
12560 minipool_vector_tail = mp;
12561 }
12562 else
12563 {
12564 if (max_address > max_mp->max_address - mp->fix_size)
12565 mp->max_address = max_mp->max_address - mp->fix_size;
12566 else
12567 mp->max_address = max_address;
12568
12569 mp->next = max_mp;
12570 mp->prev = max_mp->prev;
12571 max_mp->prev = mp;
12572 if (mp->prev != NULL)
12573 mp->prev->next = mp;
12574 else
12575 minipool_vector_head = mp;
12576 }
12577
12578 /* Save the new entry. */
12579 max_mp = mp;
12580
12581 /* Scan over the preceding entries and adjust their addresses as
12582 required. */
12583 while (mp->prev != NULL
12584 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12585 {
12586 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12587 mp = mp->prev;
12588 }
12589
12590 return max_mp;
12591 }
12592
12593 static Mnode *
12594 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12595 HOST_WIDE_INT min_address)
12596 {
12597 HOST_WIDE_INT offset;
12598
12599 /* The code below assumes these are different. */
12600 gcc_assert (mp != min_mp);
12601
12602 if (min_mp == NULL)
12603 {
12604 if (min_address > mp->min_address)
12605 mp->min_address = min_address;
12606 }
12607 else
12608 {
12609 /* We will adjust this below if it is too loose. */
12610 mp->min_address = min_address;
12611
12612 /* Unlink MP from its current position. Since min_mp is non-null,
12613 mp->next must be non-null. */
12614 mp->next->prev = mp->prev;
12615 if (mp->prev != NULL)
12616 mp->prev->next = mp->next;
12617 else
12618 minipool_vector_head = mp->next;
12619
12620 /* Reinsert it after MIN_MP. */
12621 mp->prev = min_mp;
12622 mp->next = min_mp->next;
12623 min_mp->next = mp;
12624 if (mp->next != NULL)
12625 mp->next->prev = mp;
12626 else
12627 minipool_vector_tail = mp;
12628 }
12629
12630 min_mp = mp;
12631
12632 offset = 0;
12633 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12634 {
12635 mp->offset = offset;
12636 if (mp->refcount > 0)
12637 offset += mp->fix_size;
12638
12639 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12640 mp->next->min_address = mp->min_address + mp->fix_size;
12641 }
12642
12643 return min_mp;
12644 }
12645
12646 /* Add a constant to the minipool for a backward reference. Returns the
12647 node added or NULL if the constant will not fit in this pool.
12648
12649 Note that the code for insertion for a backwards reference can be
12650 somewhat confusing because the calculated offsets for each fix do
12651 not take into account the size of the pool (which is still under
12652 construction. */
12653 static Mnode *
12654 add_minipool_backward_ref (Mfix *fix)
12655 {
12656 /* If set, min_mp is the last pool_entry that has a lower constraint
12657 than the one we are trying to add. */
12658 Mnode *min_mp = NULL;
12659 /* This can be negative, since it is only a constraint. */
12660 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12661 Mnode *mp;
12662
12663 /* If we can't reach the current pool from this insn, or if we can't
12664 insert this entry at the end of the pool without pushing other
12665 fixes out of range, then we don't try. This ensures that we
12666 can't fail later on. */
12667 if (min_address >= minipool_barrier->address
12668 || (minipool_vector_tail->min_address + fix->fix_size
12669 >= minipool_barrier->address))
12670 return NULL;
12671
12672 /* Scan the pool to see if a constant with the same value has
12673 already been added. While we are doing this, also note the
12674 location where we must insert the constant if it doesn't already
12675 exist. */
12676 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12677 {
12678 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12679 && fix->mode == mp->mode
12680 && (!LABEL_P (fix->value)
12681 || (CODE_LABEL_NUMBER (fix->value)
12682 == CODE_LABEL_NUMBER (mp->value)))
12683 && rtx_equal_p (fix->value, mp->value)
12684 /* Check that there is enough slack to move this entry to the
12685 end of the table (this is conservative). */
12686 && (mp->max_address
12687 > (minipool_barrier->address
12688 + minipool_vector_tail->offset
12689 + minipool_vector_tail->fix_size)))
12690 {
12691 mp->refcount++;
12692 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12693 }
12694
12695 if (min_mp != NULL)
12696 mp->min_address += fix->fix_size;
12697 else
12698 {
12699 /* Note the insertion point if necessary. */
12700 if (mp->min_address < min_address)
12701 {
12702 /* For now, we do not allow the insertion of 8-byte alignment
12703 requiring nodes anywhere but at the start of the pool. */
12704 if (ARM_DOUBLEWORD_ALIGN
12705 && fix->fix_size >= 8 && mp->fix_size < 8)
12706 return NULL;
12707 else
12708 min_mp = mp;
12709 }
12710 else if (mp->max_address
12711 < minipool_barrier->address + mp->offset + fix->fix_size)
12712 {
12713 /* Inserting before this entry would push the fix beyond
12714 its maximum address (which can happen if we have
12715 re-located a forwards fix); force the new fix to come
12716 after it. */
12717 if (ARM_DOUBLEWORD_ALIGN
12718 && fix->fix_size >= 8 && mp->fix_size < 8)
12719 return NULL;
12720 else
12721 {
12722 min_mp = mp;
12723 min_address = mp->min_address + fix->fix_size;
12724 }
12725 }
12726 /* Do not insert a non-8-byte aligned quantity before 8-byte
12727 aligned quantities. */
12728 else if (ARM_DOUBLEWORD_ALIGN
12729 && fix->fix_size < 8
12730 && mp->fix_size >= 8)
12731 {
12732 min_mp = mp;
12733 min_address = mp->min_address + fix->fix_size;
12734 }
12735 }
12736 }
12737
12738 /* We need to create a new entry. */
12739 mp = XNEW (Mnode);
12740 mp->fix_size = fix->fix_size;
12741 mp->mode = fix->mode;
12742 mp->value = fix->value;
12743 mp->refcount = 1;
12744 mp->max_address = minipool_barrier->address + 65536;
12745
12746 mp->min_address = min_address;
12747
12748 if (min_mp == NULL)
12749 {
12750 mp->prev = NULL;
12751 mp->next = minipool_vector_head;
12752
12753 if (mp->next == NULL)
12754 {
12755 minipool_vector_tail = mp;
12756 minipool_vector_label = gen_label_rtx ();
12757 }
12758 else
12759 mp->next->prev = mp;
12760
12761 minipool_vector_head = mp;
12762 }
12763 else
12764 {
12765 mp->next = min_mp->next;
12766 mp->prev = min_mp;
12767 min_mp->next = mp;
12768
12769 if (mp->next != NULL)
12770 mp->next->prev = mp;
12771 else
12772 minipool_vector_tail = mp;
12773 }
12774
12775 /* Save the new entry. */
12776 min_mp = mp;
12777
12778 if (mp->prev)
12779 mp = mp->prev;
12780 else
12781 mp->offset = 0;
12782
12783 /* Scan over the following entries and adjust their offsets. */
12784 while (mp->next != NULL)
12785 {
12786 if (mp->next->min_address < mp->min_address + mp->fix_size)
12787 mp->next->min_address = mp->min_address + mp->fix_size;
12788
12789 if (mp->refcount)
12790 mp->next->offset = mp->offset + mp->fix_size;
12791 else
12792 mp->next->offset = mp->offset;
12793
12794 mp = mp->next;
12795 }
12796
12797 return min_mp;
12798 }
12799
12800 static void
12801 assign_minipool_offsets (Mfix *barrier)
12802 {
12803 HOST_WIDE_INT offset = 0;
12804 Mnode *mp;
12805
12806 minipool_barrier = barrier;
12807
12808 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12809 {
12810 mp->offset = offset;
12811
12812 if (mp->refcount > 0)
12813 offset += mp->fix_size;
12814 }
12815 }
12816
12817 /* Output the literal table */
12818 static void
12819 dump_minipool (rtx scan)
12820 {
12821 Mnode * mp;
12822 Mnode * nmp;
12823 int align64 = 0;
12824
12825 if (ARM_DOUBLEWORD_ALIGN)
12826 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12827 if (mp->refcount > 0 && mp->fix_size >= 8)
12828 {
12829 align64 = 1;
12830 break;
12831 }
12832
12833 if (dump_file)
12834 fprintf (dump_file,
12835 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12836 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12837
12838 scan = emit_label_after (gen_label_rtx (), scan);
12839 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12840 scan = emit_label_after (minipool_vector_label, scan);
12841
12842 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12843 {
12844 if (mp->refcount > 0)
12845 {
12846 if (dump_file)
12847 {
12848 fprintf (dump_file,
12849 ";; Offset %u, min %ld, max %ld ",
12850 (unsigned) mp->offset, (unsigned long) mp->min_address,
12851 (unsigned long) mp->max_address);
12852 arm_print_value (dump_file, mp->value);
12853 fputc ('\n', dump_file);
12854 }
12855
12856 switch (mp->fix_size)
12857 {
12858 #ifdef HAVE_consttable_1
12859 case 1:
12860 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12861 break;
12862
12863 #endif
12864 #ifdef HAVE_consttable_2
12865 case 2:
12866 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12867 break;
12868
12869 #endif
12870 #ifdef HAVE_consttable_4
12871 case 4:
12872 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12873 break;
12874
12875 #endif
12876 #ifdef HAVE_consttable_8
12877 case 8:
12878 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12879 break;
12880
12881 #endif
12882 #ifdef HAVE_consttable_16
12883 case 16:
12884 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12885 break;
12886
12887 #endif
12888 default:
12889 gcc_unreachable ();
12890 }
12891 }
12892
12893 nmp = mp->next;
12894 free (mp);
12895 }
12896
12897 minipool_vector_head = minipool_vector_tail = NULL;
12898 scan = emit_insn_after (gen_consttable_end (), scan);
12899 scan = emit_barrier_after (scan);
12900 }
12901
12902 /* Return the cost of forcibly inserting a barrier after INSN. */
12903 static int
12904 arm_barrier_cost (rtx insn)
12905 {
12906 /* Basing the location of the pool on the loop depth is preferable,
12907 but at the moment, the basic block information seems to be
12908 corrupt by this stage of the compilation. */
12909 int base_cost = 50;
12910 rtx next = next_nonnote_insn (insn);
12911
12912 if (next != NULL && LABEL_P (next))
12913 base_cost -= 20;
12914
12915 switch (GET_CODE (insn))
12916 {
12917 case CODE_LABEL:
12918 /* It will always be better to place the table before the label, rather
12919 than after it. */
12920 return 50;
12921
12922 case INSN:
12923 case CALL_INSN:
12924 return base_cost;
12925
12926 case JUMP_INSN:
12927 return base_cost - 10;
12928
12929 default:
12930 return base_cost + 10;
12931 }
12932 }
12933
12934 /* Find the best place in the insn stream in the range
12935 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12936 Create the barrier by inserting a jump and add a new fix entry for
12937 it. */
12938 static Mfix *
12939 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12940 {
12941 HOST_WIDE_INT count = 0;
12942 rtx barrier;
12943 rtx from = fix->insn;
12944 /* The instruction after which we will insert the jump. */
12945 rtx selected = NULL;
12946 int selected_cost;
12947 /* The address at which the jump instruction will be placed. */
12948 HOST_WIDE_INT selected_address;
12949 Mfix * new_fix;
12950 HOST_WIDE_INT max_count = max_address - fix->address;
12951 rtx label = gen_label_rtx ();
12952
12953 selected_cost = arm_barrier_cost (from);
12954 selected_address = fix->address;
12955
12956 while (from && count < max_count)
12957 {
12958 rtx tmp;
12959 int new_cost;
12960
12961 /* This code shouldn't have been called if there was a natural barrier
12962 within range. */
12963 gcc_assert (!BARRIER_P (from));
12964
12965 /* Count the length of this insn. This must stay in sync with the
12966 code that pushes minipool fixes. */
12967 if (LABEL_P (from))
12968 count += get_label_padding (from);
12969 else
12970 count += get_attr_length (from);
12971
12972 /* If there is a jump table, add its length. */
12973 tmp = is_jump_table (from);
12974 if (tmp != NULL)
12975 {
12976 count += get_jump_table_size (tmp);
12977
12978 /* Jump tables aren't in a basic block, so base the cost on
12979 the dispatch insn. If we select this location, we will
12980 still put the pool after the table. */
12981 new_cost = arm_barrier_cost (from);
12982
12983 if (count < max_count
12984 && (!selected || new_cost <= selected_cost))
12985 {
12986 selected = tmp;
12987 selected_cost = new_cost;
12988 selected_address = fix->address + count;
12989 }
12990
12991 /* Continue after the dispatch table. */
12992 from = NEXT_INSN (tmp);
12993 continue;
12994 }
12995
12996 new_cost = arm_barrier_cost (from);
12997
12998 if (count < max_count
12999 && (!selected || new_cost <= selected_cost))
13000 {
13001 selected = from;
13002 selected_cost = new_cost;
13003 selected_address = fix->address + count;
13004 }
13005
13006 from = NEXT_INSN (from);
13007 }
13008
13009 /* Make sure that we found a place to insert the jump. */
13010 gcc_assert (selected);
13011
13012 /* Make sure we do not split a call and its corresponding
13013 CALL_ARG_LOCATION note. */
13014 if (CALL_P (selected))
13015 {
13016 rtx next = NEXT_INSN (selected);
13017 if (next && NOTE_P (next)
13018 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13019 selected = next;
13020 }
13021
13022 /* Create a new JUMP_INSN that branches around a barrier. */
13023 from = emit_jump_insn_after (gen_jump (label), selected);
13024 JUMP_LABEL (from) = label;
13025 barrier = emit_barrier_after (from);
13026 emit_label_after (label, barrier);
13027
13028 /* Create a minipool barrier entry for the new barrier. */
13029 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13030 new_fix->insn = barrier;
13031 new_fix->address = selected_address;
13032 new_fix->next = fix->next;
13033 fix->next = new_fix;
13034
13035 return new_fix;
13036 }
13037
13038 /* Record that there is a natural barrier in the insn stream at
13039 ADDRESS. */
13040 static void
13041 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13042 {
13043 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13044
13045 fix->insn = insn;
13046 fix->address = address;
13047
13048 fix->next = NULL;
13049 if (minipool_fix_head != NULL)
13050 minipool_fix_tail->next = fix;
13051 else
13052 minipool_fix_head = fix;
13053
13054 minipool_fix_tail = fix;
13055 }
13056
13057 /* Record INSN, which will need fixing up to load a value from the
13058 minipool. ADDRESS is the offset of the insn since the start of the
13059 function; LOC is a pointer to the part of the insn which requires
13060 fixing; VALUE is the constant that must be loaded, which is of type
13061 MODE. */
13062 static void
13063 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13064 enum machine_mode mode, rtx value)
13065 {
13066 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13067
13068 fix->insn = insn;
13069 fix->address = address;
13070 fix->loc = loc;
13071 fix->mode = mode;
13072 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13073 fix->value = value;
13074 fix->forwards = get_attr_pool_range (insn);
13075 fix->backwards = get_attr_neg_pool_range (insn);
13076 fix->minipool = NULL;
13077
13078 /* If an insn doesn't have a range defined for it, then it isn't
13079 expecting to be reworked by this code. Better to stop now than
13080 to generate duff assembly code. */
13081 gcc_assert (fix->forwards || fix->backwards);
13082
13083 /* If an entry requires 8-byte alignment then assume all constant pools
13084 require 4 bytes of padding. Trying to do this later on a per-pool
13085 basis is awkward because existing pool entries have to be modified. */
13086 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13087 minipool_pad = 4;
13088
13089 if (dump_file)
13090 {
13091 fprintf (dump_file,
13092 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13093 GET_MODE_NAME (mode),
13094 INSN_UID (insn), (unsigned long) address,
13095 -1 * (long)fix->backwards, (long)fix->forwards);
13096 arm_print_value (dump_file, fix->value);
13097 fprintf (dump_file, "\n");
13098 }
13099
13100 /* Add it to the chain of fixes. */
13101 fix->next = NULL;
13102
13103 if (minipool_fix_head != NULL)
13104 minipool_fix_tail->next = fix;
13105 else
13106 minipool_fix_head = fix;
13107
13108 minipool_fix_tail = fix;
13109 }
13110
13111 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13112 Returns the number of insns needed, or 99 if we don't know how to
13113 do it. */
13114 int
13115 arm_const_double_inline_cost (rtx val)
13116 {
13117 rtx lowpart, highpart;
13118 enum machine_mode mode;
13119
13120 mode = GET_MODE (val);
13121
13122 if (mode == VOIDmode)
13123 mode = DImode;
13124
13125 gcc_assert (GET_MODE_SIZE (mode) == 8);
13126
13127 lowpart = gen_lowpart (SImode, val);
13128 highpart = gen_highpart_mode (SImode, mode, val);
13129
13130 gcc_assert (CONST_INT_P (lowpart));
13131 gcc_assert (CONST_INT_P (highpart));
13132
13133 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13134 NULL_RTX, NULL_RTX, 0, 0)
13135 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13136 NULL_RTX, NULL_RTX, 0, 0));
13137 }
13138
13139 /* Return true if it is worthwhile to split a 64-bit constant into two
13140 32-bit operations. This is the case if optimizing for size, or
13141 if we have load delay slots, or if one 32-bit part can be done with
13142 a single data operation. */
13143 bool
13144 arm_const_double_by_parts (rtx val)
13145 {
13146 enum machine_mode mode = GET_MODE (val);
13147 rtx part;
13148
13149 if (optimize_size || arm_ld_sched)
13150 return true;
13151
13152 if (mode == VOIDmode)
13153 mode = DImode;
13154
13155 part = gen_highpart_mode (SImode, mode, val);
13156
13157 gcc_assert (CONST_INT_P (part));
13158
13159 if (const_ok_for_arm (INTVAL (part))
13160 || const_ok_for_arm (~INTVAL (part)))
13161 return true;
13162
13163 part = gen_lowpart (SImode, val);
13164
13165 gcc_assert (CONST_INT_P (part));
13166
13167 if (const_ok_for_arm (INTVAL (part))
13168 || const_ok_for_arm (~INTVAL (part)))
13169 return true;
13170
13171 return false;
13172 }
13173
13174 /* Return true if it is possible to inline both the high and low parts
13175 of a 64-bit constant into 32-bit data processing instructions. */
13176 bool
13177 arm_const_double_by_immediates (rtx val)
13178 {
13179 enum machine_mode mode = GET_MODE (val);
13180 rtx part;
13181
13182 if (mode == VOIDmode)
13183 mode = DImode;
13184
13185 part = gen_highpart_mode (SImode, mode, val);
13186
13187 gcc_assert (CONST_INT_P (part));
13188
13189 if (!const_ok_for_arm (INTVAL (part)))
13190 return false;
13191
13192 part = gen_lowpart (SImode, val);
13193
13194 gcc_assert (CONST_INT_P (part));
13195
13196 if (!const_ok_for_arm (INTVAL (part)))
13197 return false;
13198
13199 return true;
13200 }
13201
13202 /* Scan INSN and note any of its operands that need fixing.
13203 If DO_PUSHES is false we do not actually push any of the fixups
13204 needed. */
13205 static void
13206 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13207 {
13208 int opno;
13209
13210 extract_insn (insn);
13211
13212 if (!constrain_operands (1))
13213 fatal_insn_not_found (insn);
13214
13215 if (recog_data.n_alternatives == 0)
13216 return;
13217
13218 /* Fill in recog_op_alt with information about the constraints of
13219 this insn. */
13220 preprocess_constraints ();
13221
13222 for (opno = 0; opno < recog_data.n_operands; opno++)
13223 {
13224 /* Things we need to fix can only occur in inputs. */
13225 if (recog_data.operand_type[opno] != OP_IN)
13226 continue;
13227
13228 /* If this alternative is a memory reference, then any mention
13229 of constants in this alternative is really to fool reload
13230 into allowing us to accept one there. We need to fix them up
13231 now so that we output the right code. */
13232 if (recog_op_alt[opno][which_alternative].memory_ok)
13233 {
13234 rtx op = recog_data.operand[opno];
13235
13236 if (CONSTANT_P (op))
13237 {
13238 if (do_pushes)
13239 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13240 recog_data.operand_mode[opno], op);
13241 }
13242 else if (MEM_P (op)
13243 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13244 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13245 {
13246 if (do_pushes)
13247 {
13248 rtx cop = avoid_constant_pool_reference (op);
13249
13250 /* Casting the address of something to a mode narrower
13251 than a word can cause avoid_constant_pool_reference()
13252 to return the pool reference itself. That's no good to
13253 us here. Lets just hope that we can use the
13254 constant pool value directly. */
13255 if (op == cop)
13256 cop = get_pool_constant (XEXP (op, 0));
13257
13258 push_minipool_fix (insn, address,
13259 recog_data.operand_loc[opno],
13260 recog_data.operand_mode[opno], cop);
13261 }
13262
13263 }
13264 }
13265 }
13266
13267 return;
13268 }
13269
13270 /* Convert instructions to their cc-clobbering variant if possible, since
13271 that allows us to use smaller encodings. */
13272
13273 static void
13274 thumb2_reorg (void)
13275 {
13276 basic_block bb;
13277 regset_head live;
13278
13279 INIT_REG_SET (&live);
13280
13281 /* We are freeing block_for_insn in the toplev to keep compatibility
13282 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13283 compute_bb_for_insn ();
13284 df_analyze ();
13285
13286 FOR_EACH_BB (bb)
13287 {
13288 rtx insn;
13289
13290 COPY_REG_SET (&live, DF_LR_OUT (bb));
13291 df_simulate_initialize_backwards (bb, &live);
13292 FOR_BB_INSNS_REVERSE (bb, insn)
13293 {
13294 if (NONJUMP_INSN_P (insn)
13295 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13296 && GET_CODE (PATTERN (insn)) == SET)
13297 {
13298 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13299 rtx pat = PATTERN (insn);
13300 rtx dst = XEXP (pat, 0);
13301 rtx src = XEXP (pat, 1);
13302 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13303
13304 if (!OBJECT_P (src))
13305 op0 = XEXP (src, 0);
13306
13307 if (BINARY_P (src))
13308 op1 = XEXP (src, 1);
13309
13310 if (low_register_operand (dst, SImode))
13311 {
13312 switch (GET_CODE (src))
13313 {
13314 case PLUS:
13315 /* Adding two registers and storing the result
13316 in the first source is already a 16-bit
13317 operation. */
13318 if (rtx_equal_p (dst, op0)
13319 && register_operand (op1, SImode))
13320 break;
13321
13322 if (low_register_operand (op0, SImode))
13323 {
13324 /* ADDS <Rd>,<Rn>,<Rm> */
13325 if (low_register_operand (op1, SImode))
13326 action = CONV;
13327 /* ADDS <Rdn>,#<imm8> */
13328 /* SUBS <Rdn>,#<imm8> */
13329 else if (rtx_equal_p (dst, op0)
13330 && CONST_INT_P (op1)
13331 && IN_RANGE (INTVAL (op1), -255, 255))
13332 action = CONV;
13333 /* ADDS <Rd>,<Rn>,#<imm3> */
13334 /* SUBS <Rd>,<Rn>,#<imm3> */
13335 else if (CONST_INT_P (op1)
13336 && IN_RANGE (INTVAL (op1), -7, 7))
13337 action = CONV;
13338 }
13339 break;
13340
13341 case MINUS:
13342 /* RSBS <Rd>,<Rn>,#0
13343 Not handled here: see NEG below. */
13344 /* SUBS <Rd>,<Rn>,#<imm3>
13345 SUBS <Rdn>,#<imm8>
13346 Not handled here: see PLUS above. */
13347 /* SUBS <Rd>,<Rn>,<Rm> */
13348 if (low_register_operand (op0, SImode)
13349 && low_register_operand (op1, SImode))
13350 action = CONV;
13351 break;
13352
13353 case MULT:
13354 /* MULS <Rdm>,<Rn>,<Rdm>
13355 As an exception to the rule, this is only used
13356 when optimizing for size since MULS is slow on all
13357 known implementations. We do not even want to use
13358 MULS in cold code, if optimizing for speed, so we
13359 test the global flag here. */
13360 if (!optimize_size)
13361 break;
13362 /* else fall through. */
13363 case AND:
13364 case IOR:
13365 case XOR:
13366 /* ANDS <Rdn>,<Rm> */
13367 if (rtx_equal_p (dst, op0)
13368 && low_register_operand (op1, SImode))
13369 action = CONV;
13370 else if (rtx_equal_p (dst, op1)
13371 && low_register_operand (op0, SImode))
13372 action = SWAP_CONV;
13373 break;
13374
13375 case ASHIFTRT:
13376 case ASHIFT:
13377 case LSHIFTRT:
13378 /* ASRS <Rdn>,<Rm> */
13379 /* LSRS <Rdn>,<Rm> */
13380 /* LSLS <Rdn>,<Rm> */
13381 if (rtx_equal_p (dst, op0)
13382 && low_register_operand (op1, SImode))
13383 action = CONV;
13384 /* ASRS <Rd>,<Rm>,#<imm5> */
13385 /* LSRS <Rd>,<Rm>,#<imm5> */
13386 /* LSLS <Rd>,<Rm>,#<imm5> */
13387 else if (low_register_operand (op0, SImode)
13388 && CONST_INT_P (op1)
13389 && IN_RANGE (INTVAL (op1), 0, 31))
13390 action = CONV;
13391 break;
13392
13393 case ROTATERT:
13394 /* RORS <Rdn>,<Rm> */
13395 if (rtx_equal_p (dst, op0)
13396 && low_register_operand (op1, SImode))
13397 action = CONV;
13398 break;
13399
13400 case NOT:
13401 case NEG:
13402 /* MVNS <Rd>,<Rm> */
13403 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13404 if (low_register_operand (op0, SImode))
13405 action = CONV;
13406 break;
13407
13408 case CONST_INT:
13409 /* MOVS <Rd>,#<imm8> */
13410 if (CONST_INT_P (src)
13411 && IN_RANGE (INTVAL (src), 0, 255))
13412 action = CONV;
13413 break;
13414
13415 case REG:
13416 /* MOVS and MOV<c> with registers have different
13417 encodings, so are not relevant here. */
13418 break;
13419
13420 default:
13421 break;
13422 }
13423 }
13424
13425 if (action != SKIP)
13426 {
13427 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13428 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13429 rtvec vec;
13430
13431 if (action == SWAP_CONV)
13432 {
13433 src = copy_rtx (src);
13434 XEXP (src, 0) = op1;
13435 XEXP (src, 1) = op0;
13436 pat = gen_rtx_SET (VOIDmode, dst, src);
13437 vec = gen_rtvec (2, pat, clobber);
13438 }
13439 else /* action == CONV */
13440 vec = gen_rtvec (2, pat, clobber);
13441
13442 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13443 INSN_CODE (insn) = -1;
13444 }
13445 }
13446
13447 if (NONDEBUG_INSN_P (insn))
13448 df_simulate_one_insn_backwards (bb, insn, &live);
13449 }
13450 }
13451
13452 CLEAR_REG_SET (&live);
13453 }
13454
13455 /* Gcc puts the pool in the wrong place for ARM, since we can only
13456 load addresses a limited distance around the pc. We do some
13457 special munging to move the constant pool values to the correct
13458 point in the code. */
13459 static void
13460 arm_reorg (void)
13461 {
13462 rtx insn;
13463 HOST_WIDE_INT address = 0;
13464 Mfix * fix;
13465
13466 if (TARGET_THUMB2)
13467 thumb2_reorg ();
13468
13469 /* Ensure all insns that must be split have been split at this point.
13470 Otherwise, the pool placement code below may compute incorrect
13471 insn lengths. Note that when optimizing, all insns have already
13472 been split at this point. */
13473 if (!optimize)
13474 split_all_insns_noflow ();
13475
13476 minipool_fix_head = minipool_fix_tail = NULL;
13477
13478 /* The first insn must always be a note, or the code below won't
13479 scan it properly. */
13480 insn = get_insns ();
13481 gcc_assert (NOTE_P (insn));
13482 minipool_pad = 0;
13483
13484 /* Scan all the insns and record the operands that will need fixing. */
13485 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13486 {
13487 if (BARRIER_P (insn))
13488 push_minipool_barrier (insn, address);
13489 else if (INSN_P (insn))
13490 {
13491 rtx table;
13492
13493 note_invalid_constants (insn, address, true);
13494 address += get_attr_length (insn);
13495
13496 /* If the insn is a vector jump, add the size of the table
13497 and skip the table. */
13498 if ((table = is_jump_table (insn)) != NULL)
13499 {
13500 address += get_jump_table_size (table);
13501 insn = table;
13502 }
13503 }
13504 else if (LABEL_P (insn))
13505 /* Add the worst-case padding due to alignment. We don't add
13506 the _current_ padding because the minipool insertions
13507 themselves might change it. */
13508 address += get_label_padding (insn);
13509 }
13510
13511 fix = minipool_fix_head;
13512
13513 /* Now scan the fixups and perform the required changes. */
13514 while (fix)
13515 {
13516 Mfix * ftmp;
13517 Mfix * fdel;
13518 Mfix * last_added_fix;
13519 Mfix * last_barrier = NULL;
13520 Mfix * this_fix;
13521
13522 /* Skip any further barriers before the next fix. */
13523 while (fix && BARRIER_P (fix->insn))
13524 fix = fix->next;
13525
13526 /* No more fixes. */
13527 if (fix == NULL)
13528 break;
13529
13530 last_added_fix = NULL;
13531
13532 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13533 {
13534 if (BARRIER_P (ftmp->insn))
13535 {
13536 if (ftmp->address >= minipool_vector_head->max_address)
13537 break;
13538
13539 last_barrier = ftmp;
13540 }
13541 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13542 break;
13543
13544 last_added_fix = ftmp; /* Keep track of the last fix added. */
13545 }
13546
13547 /* If we found a barrier, drop back to that; any fixes that we
13548 could have reached but come after the barrier will now go in
13549 the next mini-pool. */
13550 if (last_barrier != NULL)
13551 {
13552 /* Reduce the refcount for those fixes that won't go into this
13553 pool after all. */
13554 for (fdel = last_barrier->next;
13555 fdel && fdel != ftmp;
13556 fdel = fdel->next)
13557 {
13558 fdel->minipool->refcount--;
13559 fdel->minipool = NULL;
13560 }
13561
13562 ftmp = last_barrier;
13563 }
13564 else
13565 {
13566 /* ftmp is first fix that we can't fit into this pool and
13567 there no natural barriers that we could use. Insert a
13568 new barrier in the code somewhere between the previous
13569 fix and this one, and arrange to jump around it. */
13570 HOST_WIDE_INT max_address;
13571
13572 /* The last item on the list of fixes must be a barrier, so
13573 we can never run off the end of the list of fixes without
13574 last_barrier being set. */
13575 gcc_assert (ftmp);
13576
13577 max_address = minipool_vector_head->max_address;
13578 /* Check that there isn't another fix that is in range that
13579 we couldn't fit into this pool because the pool was
13580 already too large: we need to put the pool before such an
13581 instruction. The pool itself may come just after the
13582 fix because create_fix_barrier also allows space for a
13583 jump instruction. */
13584 if (ftmp->address < max_address)
13585 max_address = ftmp->address + 1;
13586
13587 last_barrier = create_fix_barrier (last_added_fix, max_address);
13588 }
13589
13590 assign_minipool_offsets (last_barrier);
13591
13592 while (ftmp)
13593 {
13594 if (!BARRIER_P (ftmp->insn)
13595 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13596 == NULL))
13597 break;
13598
13599 ftmp = ftmp->next;
13600 }
13601
13602 /* Scan over the fixes we have identified for this pool, fixing them
13603 up and adding the constants to the pool itself. */
13604 for (this_fix = fix; this_fix && ftmp != this_fix;
13605 this_fix = this_fix->next)
13606 if (!BARRIER_P (this_fix->insn))
13607 {
13608 rtx addr
13609 = plus_constant (Pmode,
13610 gen_rtx_LABEL_REF (VOIDmode,
13611 minipool_vector_label),
13612 this_fix->minipool->offset);
13613 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13614 }
13615
13616 dump_minipool (last_barrier->insn);
13617 fix = ftmp;
13618 }
13619
13620 /* From now on we must synthesize any constants that we can't handle
13621 directly. This can happen if the RTL gets split during final
13622 instruction generation. */
13623 after_arm_reorg = 1;
13624
13625 /* Free the minipool memory. */
13626 obstack_free (&minipool_obstack, minipool_startobj);
13627 }
13628 \f
13629 /* Routines to output assembly language. */
13630
13631 /* If the rtx is the correct value then return the string of the number.
13632 In this way we can ensure that valid double constants are generated even
13633 when cross compiling. */
13634 const char *
13635 fp_immediate_constant (rtx x)
13636 {
13637 REAL_VALUE_TYPE r;
13638
13639 if (!fp_consts_inited)
13640 init_fp_table ();
13641
13642 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13643
13644 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13645 return "0";
13646 }
13647
13648 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13649 static const char *
13650 fp_const_from_val (REAL_VALUE_TYPE *r)
13651 {
13652 if (!fp_consts_inited)
13653 init_fp_table ();
13654
13655 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13656 return "0";
13657 }
13658
13659 /* OPERANDS[0] is the entire list of insns that constitute pop,
13660 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13661 is in the list, UPDATE is true iff the list contains explicit
13662 update of base register. */
13663 void
13664 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13665 bool update)
13666 {
13667 int i;
13668 char pattern[100];
13669 int offset;
13670 const char *conditional;
13671 int num_saves = XVECLEN (operands[0], 0);
13672 unsigned int regno;
13673 unsigned int regno_base = REGNO (operands[1]);
13674
13675 offset = 0;
13676 offset += update ? 1 : 0;
13677 offset += return_pc ? 1 : 0;
13678
13679 /* Is the base register in the list? */
13680 for (i = offset; i < num_saves; i++)
13681 {
13682 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13683 /* If SP is in the list, then the base register must be SP. */
13684 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13685 /* If base register is in the list, there must be no explicit update. */
13686 if (regno == regno_base)
13687 gcc_assert (!update);
13688 }
13689
13690 conditional = reverse ? "%?%D0" : "%?%d0";
13691 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13692 {
13693 /* Output pop (not stmfd) because it has a shorter encoding. */
13694 gcc_assert (update);
13695 sprintf (pattern, "pop%s\t{", conditional);
13696 }
13697 else
13698 {
13699 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13700 It's just a convention, their semantics are identical. */
13701 if (regno_base == SP_REGNUM)
13702 sprintf (pattern, "ldm%sfd\t", conditional);
13703 else if (TARGET_UNIFIED_ASM)
13704 sprintf (pattern, "ldmia%s\t", conditional);
13705 else
13706 sprintf (pattern, "ldm%sia\t", conditional);
13707
13708 strcat (pattern, reg_names[regno_base]);
13709 if (update)
13710 strcat (pattern, "!, {");
13711 else
13712 strcat (pattern, ", {");
13713 }
13714
13715 /* Output the first destination register. */
13716 strcat (pattern,
13717 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13718
13719 /* Output the rest of the destination registers. */
13720 for (i = offset + 1; i < num_saves; i++)
13721 {
13722 strcat (pattern, ", ");
13723 strcat (pattern,
13724 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13725 }
13726
13727 strcat (pattern, "}");
13728
13729 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13730 strcat (pattern, "^");
13731
13732 output_asm_insn (pattern, &cond);
13733 }
13734
13735
13736 /* Output the assembly for a store multiple. */
13737
13738 const char *
13739 vfp_output_fstmd (rtx * operands)
13740 {
13741 char pattern[100];
13742 int p;
13743 int base;
13744 int i;
13745
13746 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13747 p = strlen (pattern);
13748
13749 gcc_assert (REG_P (operands[1]));
13750
13751 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13752 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13753 {
13754 p += sprintf (&pattern[p], ", d%d", base + i);
13755 }
13756 strcpy (&pattern[p], "}");
13757
13758 output_asm_insn (pattern, operands);
13759 return "";
13760 }
13761
13762
13763 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13764 number of bytes pushed. */
13765
13766 static int
13767 vfp_emit_fstmd (int base_reg, int count)
13768 {
13769 rtx par;
13770 rtx dwarf;
13771 rtx tmp, reg;
13772 int i;
13773
13774 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13775 register pairs are stored by a store multiple insn. We avoid this
13776 by pushing an extra pair. */
13777 if (count == 2 && !arm_arch6)
13778 {
13779 if (base_reg == LAST_VFP_REGNUM - 3)
13780 base_reg -= 2;
13781 count++;
13782 }
13783
13784 /* FSTMD may not store more than 16 doubleword registers at once. Split
13785 larger stores into multiple parts (up to a maximum of two, in
13786 practice). */
13787 if (count > 16)
13788 {
13789 int saved;
13790 /* NOTE: base_reg is an internal register number, so each D register
13791 counts as 2. */
13792 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13793 saved += vfp_emit_fstmd (base_reg, 16);
13794 return saved;
13795 }
13796
13797 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13798 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13799
13800 reg = gen_rtx_REG (DFmode, base_reg);
13801 base_reg += 2;
13802
13803 XVECEXP (par, 0, 0)
13804 = gen_rtx_SET (VOIDmode,
13805 gen_frame_mem
13806 (BLKmode,
13807 gen_rtx_PRE_MODIFY (Pmode,
13808 stack_pointer_rtx,
13809 plus_constant
13810 (Pmode, stack_pointer_rtx,
13811 - (count * 8)))
13812 ),
13813 gen_rtx_UNSPEC (BLKmode,
13814 gen_rtvec (1, reg),
13815 UNSPEC_PUSH_MULT));
13816
13817 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13818 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13819 RTX_FRAME_RELATED_P (tmp) = 1;
13820 XVECEXP (dwarf, 0, 0) = tmp;
13821
13822 tmp = gen_rtx_SET (VOIDmode,
13823 gen_frame_mem (DFmode, stack_pointer_rtx),
13824 reg);
13825 RTX_FRAME_RELATED_P (tmp) = 1;
13826 XVECEXP (dwarf, 0, 1) = tmp;
13827
13828 for (i = 1; i < count; i++)
13829 {
13830 reg = gen_rtx_REG (DFmode, base_reg);
13831 base_reg += 2;
13832 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13833
13834 tmp = gen_rtx_SET (VOIDmode,
13835 gen_frame_mem (DFmode,
13836 plus_constant (Pmode,
13837 stack_pointer_rtx,
13838 i * 8)),
13839 reg);
13840 RTX_FRAME_RELATED_P (tmp) = 1;
13841 XVECEXP (dwarf, 0, i + 1) = tmp;
13842 }
13843
13844 par = emit_insn (par);
13845 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13846 RTX_FRAME_RELATED_P (par) = 1;
13847
13848 return count * 8;
13849 }
13850
13851 /* Emit a call instruction with pattern PAT. ADDR is the address of
13852 the call target. */
13853
13854 void
13855 arm_emit_call_insn (rtx pat, rtx addr)
13856 {
13857 rtx insn;
13858
13859 insn = emit_call_insn (pat);
13860
13861 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13862 If the call might use such an entry, add a use of the PIC register
13863 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13864 if (TARGET_VXWORKS_RTP
13865 && flag_pic
13866 && GET_CODE (addr) == SYMBOL_REF
13867 && (SYMBOL_REF_DECL (addr)
13868 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13869 : !SYMBOL_REF_LOCAL_P (addr)))
13870 {
13871 require_pic_register ();
13872 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13873 }
13874 }
13875
13876 /* Output a 'call' insn. */
13877 const char *
13878 output_call (rtx *operands)
13879 {
13880 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13881
13882 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13883 if (REGNO (operands[0]) == LR_REGNUM)
13884 {
13885 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13886 output_asm_insn ("mov%?\t%0, %|lr", operands);
13887 }
13888
13889 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13890
13891 if (TARGET_INTERWORK || arm_arch4t)
13892 output_asm_insn ("bx%?\t%0", operands);
13893 else
13894 output_asm_insn ("mov%?\t%|pc, %0", operands);
13895
13896 return "";
13897 }
13898
13899 /* Output a 'call' insn that is a reference in memory. This is
13900 disabled for ARMv5 and we prefer a blx instead because otherwise
13901 there's a significant performance overhead. */
13902 const char *
13903 output_call_mem (rtx *operands)
13904 {
13905 gcc_assert (!arm_arch5);
13906 if (TARGET_INTERWORK)
13907 {
13908 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13909 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13910 output_asm_insn ("bx%?\t%|ip", operands);
13911 }
13912 else if (regno_use_in (LR_REGNUM, operands[0]))
13913 {
13914 /* LR is used in the memory address. We load the address in the
13915 first instruction. It's safe to use IP as the target of the
13916 load since the call will kill it anyway. */
13917 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13918 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13919 if (arm_arch4t)
13920 output_asm_insn ("bx%?\t%|ip", operands);
13921 else
13922 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13923 }
13924 else
13925 {
13926 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13927 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13928 }
13929
13930 return "";
13931 }
13932
13933
13934 /* Output a move from arm registers to arm registers of a long double
13935 OPERANDS[0] is the destination.
13936 OPERANDS[1] is the source. */
13937 const char *
13938 output_mov_long_double_arm_from_arm (rtx *operands)
13939 {
13940 /* We have to be careful here because the two might overlap. */
13941 int dest_start = REGNO (operands[0]);
13942 int src_start = REGNO (operands[1]);
13943 rtx ops[2];
13944 int i;
13945
13946 if (dest_start < src_start)
13947 {
13948 for (i = 0; i < 3; i++)
13949 {
13950 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13951 ops[1] = gen_rtx_REG (SImode, src_start + i);
13952 output_asm_insn ("mov%?\t%0, %1", ops);
13953 }
13954 }
13955 else
13956 {
13957 for (i = 2; i >= 0; i--)
13958 {
13959 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13960 ops[1] = gen_rtx_REG (SImode, src_start + i);
13961 output_asm_insn ("mov%?\t%0, %1", ops);
13962 }
13963 }
13964
13965 return "";
13966 }
13967
13968 void
13969 arm_emit_movpair (rtx dest, rtx src)
13970 {
13971 /* If the src is an immediate, simplify it. */
13972 if (CONST_INT_P (src))
13973 {
13974 HOST_WIDE_INT val = INTVAL (src);
13975 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13976 if ((val >> 16) & 0x0000ffff)
13977 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13978 GEN_INT (16)),
13979 GEN_INT ((val >> 16) & 0x0000ffff));
13980 return;
13981 }
13982 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13983 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13984 }
13985
13986 /* Output a move between double words. It must be REG<-MEM
13987 or MEM<-REG. */
13988 const char *
13989 output_move_double (rtx *operands, bool emit, int *count)
13990 {
13991 enum rtx_code code0 = GET_CODE (operands[0]);
13992 enum rtx_code code1 = GET_CODE (operands[1]);
13993 rtx otherops[3];
13994 if (count)
13995 *count = 1;
13996
13997 /* The only case when this might happen is when
13998 you are looking at the length of a DImode instruction
13999 that has an invalid constant in it. */
14000 if (code0 == REG && code1 != MEM)
14001 {
14002 gcc_assert (!emit);
14003 *count = 2;
14004 return "";
14005 }
14006
14007 if (code0 == REG)
14008 {
14009 unsigned int reg0 = REGNO (operands[0]);
14010
14011 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14012
14013 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14014
14015 switch (GET_CODE (XEXP (operands[1], 0)))
14016 {
14017 case REG:
14018
14019 if (emit)
14020 {
14021 if (TARGET_LDRD
14022 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14023 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14024 else
14025 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14026 }
14027 break;
14028
14029 case PRE_INC:
14030 gcc_assert (TARGET_LDRD);
14031 if (emit)
14032 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14033 break;
14034
14035 case PRE_DEC:
14036 if (emit)
14037 {
14038 if (TARGET_LDRD)
14039 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14040 else
14041 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14042 }
14043 break;
14044
14045 case POST_INC:
14046 if (emit)
14047 {
14048 if (TARGET_LDRD)
14049 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14050 else
14051 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14052 }
14053 break;
14054
14055 case POST_DEC:
14056 gcc_assert (TARGET_LDRD);
14057 if (emit)
14058 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14059 break;
14060
14061 case PRE_MODIFY:
14062 case POST_MODIFY:
14063 /* Autoicrement addressing modes should never have overlapping
14064 base and destination registers, and overlapping index registers
14065 are already prohibited, so this doesn't need to worry about
14066 fix_cm3_ldrd. */
14067 otherops[0] = operands[0];
14068 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14069 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14070
14071 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14072 {
14073 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14074 {
14075 /* Registers overlap so split out the increment. */
14076 if (emit)
14077 {
14078 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14079 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14080 }
14081 if (count)
14082 *count = 2;
14083 }
14084 else
14085 {
14086 /* Use a single insn if we can.
14087 FIXME: IWMMXT allows offsets larger than ldrd can
14088 handle, fix these up with a pair of ldr. */
14089 if (TARGET_THUMB2
14090 || !CONST_INT_P (otherops[2])
14091 || (INTVAL (otherops[2]) > -256
14092 && INTVAL (otherops[2]) < 256))
14093 {
14094 if (emit)
14095 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14096 }
14097 else
14098 {
14099 if (emit)
14100 {
14101 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14102 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14103 }
14104 if (count)
14105 *count = 2;
14106
14107 }
14108 }
14109 }
14110 else
14111 {
14112 /* Use a single insn if we can.
14113 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14114 fix these up with a pair of ldr. */
14115 if (TARGET_THUMB2
14116 || !CONST_INT_P (otherops[2])
14117 || (INTVAL (otherops[2]) > -256
14118 && INTVAL (otherops[2]) < 256))
14119 {
14120 if (emit)
14121 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14122 }
14123 else
14124 {
14125 if (emit)
14126 {
14127 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14128 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14129 }
14130 if (count)
14131 *count = 2;
14132 }
14133 }
14134 break;
14135
14136 case LABEL_REF:
14137 case CONST:
14138 /* We might be able to use ldrd %0, %1 here. However the range is
14139 different to ldr/adr, and it is broken on some ARMv7-M
14140 implementations. */
14141 /* Use the second register of the pair to avoid problematic
14142 overlap. */
14143 otherops[1] = operands[1];
14144 if (emit)
14145 output_asm_insn ("adr%?\t%0, %1", otherops);
14146 operands[1] = otherops[0];
14147 if (emit)
14148 {
14149 if (TARGET_LDRD)
14150 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14151 else
14152 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14153 }
14154
14155 if (count)
14156 *count = 2;
14157 break;
14158
14159 /* ??? This needs checking for thumb2. */
14160 default:
14161 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14162 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14163 {
14164 otherops[0] = operands[0];
14165 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14166 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14167
14168 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14169 {
14170 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14171 {
14172 switch ((int) INTVAL (otherops[2]))
14173 {
14174 case -8:
14175 if (emit)
14176 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14177 return "";
14178 case -4:
14179 if (TARGET_THUMB2)
14180 break;
14181 if (emit)
14182 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14183 return "";
14184 case 4:
14185 if (TARGET_THUMB2)
14186 break;
14187 if (emit)
14188 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14189 return "";
14190 }
14191 }
14192 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14193 operands[1] = otherops[0];
14194 if (TARGET_LDRD
14195 && (REG_P (otherops[2])
14196 || TARGET_THUMB2
14197 || (CONST_INT_P (otherops[2])
14198 && INTVAL (otherops[2]) > -256
14199 && INTVAL (otherops[2]) < 256)))
14200 {
14201 if (reg_overlap_mentioned_p (operands[0],
14202 otherops[2]))
14203 {
14204 rtx tmp;
14205 /* Swap base and index registers over to
14206 avoid a conflict. */
14207 tmp = otherops[1];
14208 otherops[1] = otherops[2];
14209 otherops[2] = tmp;
14210 }
14211 /* If both registers conflict, it will usually
14212 have been fixed by a splitter. */
14213 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14214 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14215 {
14216 if (emit)
14217 {
14218 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14219 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14220 }
14221 if (count)
14222 *count = 2;
14223 }
14224 else
14225 {
14226 otherops[0] = operands[0];
14227 if (emit)
14228 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14229 }
14230 return "";
14231 }
14232
14233 if (CONST_INT_P (otherops[2]))
14234 {
14235 if (emit)
14236 {
14237 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14238 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14239 else
14240 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14241 }
14242 }
14243 else
14244 {
14245 if (emit)
14246 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14247 }
14248 }
14249 else
14250 {
14251 if (emit)
14252 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14253 }
14254
14255 if (count)
14256 *count = 2;
14257
14258 if (TARGET_LDRD)
14259 return "ldr%(d%)\t%0, [%1]";
14260
14261 return "ldm%(ia%)\t%1, %M0";
14262 }
14263 else
14264 {
14265 otherops[1] = adjust_address (operands[1], SImode, 4);
14266 /* Take care of overlapping base/data reg. */
14267 if (reg_mentioned_p (operands[0], operands[1]))
14268 {
14269 if (emit)
14270 {
14271 output_asm_insn ("ldr%?\t%0, %1", otherops);
14272 output_asm_insn ("ldr%?\t%0, %1", operands);
14273 }
14274 if (count)
14275 *count = 2;
14276
14277 }
14278 else
14279 {
14280 if (emit)
14281 {
14282 output_asm_insn ("ldr%?\t%0, %1", operands);
14283 output_asm_insn ("ldr%?\t%0, %1", otherops);
14284 }
14285 if (count)
14286 *count = 2;
14287 }
14288 }
14289 }
14290 }
14291 else
14292 {
14293 /* Constraints should ensure this. */
14294 gcc_assert (code0 == MEM && code1 == REG);
14295 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14296
14297 switch (GET_CODE (XEXP (operands[0], 0)))
14298 {
14299 case REG:
14300 if (emit)
14301 {
14302 if (TARGET_LDRD)
14303 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14304 else
14305 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14306 }
14307 break;
14308
14309 case PRE_INC:
14310 gcc_assert (TARGET_LDRD);
14311 if (emit)
14312 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14313 break;
14314
14315 case PRE_DEC:
14316 if (emit)
14317 {
14318 if (TARGET_LDRD)
14319 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14320 else
14321 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14322 }
14323 break;
14324
14325 case POST_INC:
14326 if (emit)
14327 {
14328 if (TARGET_LDRD)
14329 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14330 else
14331 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14332 }
14333 break;
14334
14335 case POST_DEC:
14336 gcc_assert (TARGET_LDRD);
14337 if (emit)
14338 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14339 break;
14340
14341 case PRE_MODIFY:
14342 case POST_MODIFY:
14343 otherops[0] = operands[1];
14344 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14345 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14346
14347 /* IWMMXT allows offsets larger than ldrd can handle,
14348 fix these up with a pair of ldr. */
14349 if (!TARGET_THUMB2
14350 && CONST_INT_P (otherops[2])
14351 && (INTVAL(otherops[2]) <= -256
14352 || INTVAL(otherops[2]) >= 256))
14353 {
14354 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14355 {
14356 if (emit)
14357 {
14358 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14359 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14360 }
14361 if (count)
14362 *count = 2;
14363 }
14364 else
14365 {
14366 if (emit)
14367 {
14368 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14369 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14370 }
14371 if (count)
14372 *count = 2;
14373 }
14374 }
14375 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14376 {
14377 if (emit)
14378 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14379 }
14380 else
14381 {
14382 if (emit)
14383 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14384 }
14385 break;
14386
14387 case PLUS:
14388 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14389 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14390 {
14391 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14392 {
14393 case -8:
14394 if (emit)
14395 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14396 return "";
14397
14398 case -4:
14399 if (TARGET_THUMB2)
14400 break;
14401 if (emit)
14402 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14403 return "";
14404
14405 case 4:
14406 if (TARGET_THUMB2)
14407 break;
14408 if (emit)
14409 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14410 return "";
14411 }
14412 }
14413 if (TARGET_LDRD
14414 && (REG_P (otherops[2])
14415 || TARGET_THUMB2
14416 || (CONST_INT_P (otherops[2])
14417 && INTVAL (otherops[2]) > -256
14418 && INTVAL (otherops[2]) < 256)))
14419 {
14420 otherops[0] = operands[1];
14421 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14422 if (emit)
14423 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14424 return "";
14425 }
14426 /* Fall through */
14427
14428 default:
14429 otherops[0] = adjust_address (operands[0], SImode, 4);
14430 otherops[1] = operands[1];
14431 if (emit)
14432 {
14433 output_asm_insn ("str%?\t%1, %0", operands);
14434 output_asm_insn ("str%?\t%H1, %0", otherops);
14435 }
14436 if (count)
14437 *count = 2;
14438 }
14439 }
14440
14441 return "";
14442 }
14443
14444 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14445 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14446
14447 const char *
14448 output_move_quad (rtx *operands)
14449 {
14450 if (REG_P (operands[0]))
14451 {
14452 /* Load, or reg->reg move. */
14453
14454 if (MEM_P (operands[1]))
14455 {
14456 switch (GET_CODE (XEXP (operands[1], 0)))
14457 {
14458 case REG:
14459 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14460 break;
14461
14462 case LABEL_REF:
14463 case CONST:
14464 output_asm_insn ("adr%?\t%0, %1", operands);
14465 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14466 break;
14467
14468 default:
14469 gcc_unreachable ();
14470 }
14471 }
14472 else
14473 {
14474 rtx ops[2];
14475 int dest, src, i;
14476
14477 gcc_assert (REG_P (operands[1]));
14478
14479 dest = REGNO (operands[0]);
14480 src = REGNO (operands[1]);
14481
14482 /* This seems pretty dumb, but hopefully GCC won't try to do it
14483 very often. */
14484 if (dest < src)
14485 for (i = 0; i < 4; i++)
14486 {
14487 ops[0] = gen_rtx_REG (SImode, dest + i);
14488 ops[1] = gen_rtx_REG (SImode, src + i);
14489 output_asm_insn ("mov%?\t%0, %1", ops);
14490 }
14491 else
14492 for (i = 3; i >= 0; i--)
14493 {
14494 ops[0] = gen_rtx_REG (SImode, dest + i);
14495 ops[1] = gen_rtx_REG (SImode, src + i);
14496 output_asm_insn ("mov%?\t%0, %1", ops);
14497 }
14498 }
14499 }
14500 else
14501 {
14502 gcc_assert (MEM_P (operands[0]));
14503 gcc_assert (REG_P (operands[1]));
14504 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14505
14506 switch (GET_CODE (XEXP (operands[0], 0)))
14507 {
14508 case REG:
14509 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14510 break;
14511
14512 default:
14513 gcc_unreachable ();
14514 }
14515 }
14516
14517 return "";
14518 }
14519
14520 /* Output a VFP load or store instruction. */
14521
14522 const char *
14523 output_move_vfp (rtx *operands)
14524 {
14525 rtx reg, mem, addr, ops[2];
14526 int load = REG_P (operands[0]);
14527 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14528 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14529 const char *templ;
14530 char buff[50];
14531 enum machine_mode mode;
14532
14533 reg = operands[!load];
14534 mem = operands[load];
14535
14536 mode = GET_MODE (reg);
14537
14538 gcc_assert (REG_P (reg));
14539 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14540 gcc_assert (mode == SFmode
14541 || mode == DFmode
14542 || mode == SImode
14543 || mode == DImode
14544 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14545 gcc_assert (MEM_P (mem));
14546
14547 addr = XEXP (mem, 0);
14548
14549 switch (GET_CODE (addr))
14550 {
14551 case PRE_DEC:
14552 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14553 ops[0] = XEXP (addr, 0);
14554 ops[1] = reg;
14555 break;
14556
14557 case POST_INC:
14558 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14559 ops[0] = XEXP (addr, 0);
14560 ops[1] = reg;
14561 break;
14562
14563 default:
14564 templ = "f%s%c%%?\t%%%s0, %%1%s";
14565 ops[0] = reg;
14566 ops[1] = mem;
14567 break;
14568 }
14569
14570 sprintf (buff, templ,
14571 load ? "ld" : "st",
14572 dp ? 'd' : 's',
14573 dp ? "P" : "",
14574 integer_p ? "\t%@ int" : "");
14575 output_asm_insn (buff, ops);
14576
14577 return "";
14578 }
14579
14580 /* Output a Neon double-word or quad-word load or store, or a load
14581 or store for larger structure modes.
14582
14583 WARNING: The ordering of elements is weird in big-endian mode,
14584 because the EABI requires that vectors stored in memory appear
14585 as though they were stored by a VSTM, as required by the EABI.
14586 GCC RTL defines element ordering based on in-memory order.
14587 This can be different from the architectural ordering of elements
14588 within a NEON register. The intrinsics defined in arm_neon.h use the
14589 NEON register element ordering, not the GCC RTL element ordering.
14590
14591 For example, the in-memory ordering of a big-endian a quadword
14592 vector with 16-bit elements when stored from register pair {d0,d1}
14593 will be (lowest address first, d0[N] is NEON register element N):
14594
14595 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14596
14597 When necessary, quadword registers (dN, dN+1) are moved to ARM
14598 registers from rN in the order:
14599
14600 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14601
14602 So that STM/LDM can be used on vectors in ARM registers, and the
14603 same memory layout will result as if VSTM/VLDM were used.
14604
14605 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
14606 possible, which allows use of appropriate alignment tags.
14607 Note that the choice of "64" is independent of the actual vector
14608 element size; this size simply ensures that the behavior is
14609 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
14610
14611 Due to limitations of those instructions, use of VST1.64/VLD1.64
14612 is not possible if:
14613 - the address contains PRE_DEC, or
14614 - the mode refers to more than 4 double-word registers
14615
14616 In those cases, it would be possible to replace VSTM/VLDM by a
14617 sequence of instructions; this is not currently implemented since
14618 this is not certain to actually improve performance. */
14619
14620 const char *
14621 output_move_neon (rtx *operands)
14622 {
14623 rtx reg, mem, addr, ops[2];
14624 int regno, nregs, load = REG_P (operands[0]);
14625 const char *templ;
14626 char buff[50];
14627 enum machine_mode mode;
14628
14629 reg = operands[!load];
14630 mem = operands[load];
14631
14632 mode = GET_MODE (reg);
14633
14634 gcc_assert (REG_P (reg));
14635 regno = REGNO (reg);
14636 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
14637 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14638 || NEON_REGNO_OK_FOR_QUAD (regno));
14639 gcc_assert (VALID_NEON_DREG_MODE (mode)
14640 || VALID_NEON_QREG_MODE (mode)
14641 || VALID_NEON_STRUCT_MODE (mode));
14642 gcc_assert (MEM_P (mem));
14643
14644 addr = XEXP (mem, 0);
14645
14646 /* Strip off const from addresses like (const (plus (...))). */
14647 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14648 addr = XEXP (addr, 0);
14649
14650 switch (GET_CODE (addr))
14651 {
14652 case POST_INC:
14653 /* We have to use vldm / vstm for too-large modes. */
14654 if (nregs > 4)
14655 {
14656 templ = "v%smia%%?\t%%0!, %%h1";
14657 ops[0] = XEXP (addr, 0);
14658 }
14659 else
14660 {
14661 templ = "v%s1.64\t%%h1, %%A0";
14662 ops[0] = mem;
14663 }
14664 ops[1] = reg;
14665 break;
14666
14667 case PRE_DEC:
14668 /* We have to use vldm / vstm in this case, since there is no
14669 pre-decrement form of the vld1 / vst1 instructions. */
14670 templ = "v%smdb%%?\t%%0!, %%h1";
14671 ops[0] = XEXP (addr, 0);
14672 ops[1] = reg;
14673 break;
14674
14675 case POST_MODIFY:
14676 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14677 gcc_unreachable ();
14678
14679 case LABEL_REF:
14680 case PLUS:
14681 {
14682 int i;
14683 int overlap = -1;
14684 for (i = 0; i < nregs; i++)
14685 {
14686 /* We're only using DImode here because it's a convenient size. */
14687 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14688 ops[1] = adjust_address (mem, DImode, 8 * i);
14689 if (reg_overlap_mentioned_p (ops[0], mem))
14690 {
14691 gcc_assert (overlap == -1);
14692 overlap = i;
14693 }
14694 else
14695 {
14696 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14697 output_asm_insn (buff, ops);
14698 }
14699 }
14700 if (overlap != -1)
14701 {
14702 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14703 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14704 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14705 output_asm_insn (buff, ops);
14706 }
14707
14708 return "";
14709 }
14710
14711 default:
14712 /* We have to use vldm / vstm for too-large modes. */
14713 if (nregs > 4)
14714 templ = "v%smia%%?\t%%m0, %%h1";
14715 else
14716 templ = "v%s1.64\t%%h1, %%A0";
14717
14718 ops[0] = mem;
14719 ops[1] = reg;
14720 }
14721
14722 sprintf (buff, templ, load ? "ld" : "st");
14723 output_asm_insn (buff, ops);
14724
14725 return "";
14726 }
14727
14728 /* Compute and return the length of neon_mov<mode>, where <mode> is
14729 one of VSTRUCT modes: EI, OI, CI or XI. */
14730 int
14731 arm_attr_length_move_neon (rtx insn)
14732 {
14733 rtx reg, mem, addr;
14734 int load;
14735 enum machine_mode mode;
14736
14737 extract_insn_cached (insn);
14738
14739 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14740 {
14741 mode = GET_MODE (recog_data.operand[0]);
14742 switch (mode)
14743 {
14744 case EImode:
14745 case OImode:
14746 return 8;
14747 case CImode:
14748 return 12;
14749 case XImode:
14750 return 16;
14751 default:
14752 gcc_unreachable ();
14753 }
14754 }
14755
14756 load = REG_P (recog_data.operand[0]);
14757 reg = recog_data.operand[!load];
14758 mem = recog_data.operand[load];
14759
14760 gcc_assert (MEM_P (mem));
14761
14762 mode = GET_MODE (reg);
14763 addr = XEXP (mem, 0);
14764
14765 /* Strip off const from addresses like (const (plus (...))). */
14766 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14767 addr = XEXP (addr, 0);
14768
14769 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14770 {
14771 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14772 return insns * 4;
14773 }
14774 else
14775 return 4;
14776 }
14777
14778 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14779 return zero. */
14780
14781 int
14782 arm_address_offset_is_imm (rtx insn)
14783 {
14784 rtx mem, addr;
14785
14786 extract_insn_cached (insn);
14787
14788 if (REG_P (recog_data.operand[0]))
14789 return 0;
14790
14791 mem = recog_data.operand[0];
14792
14793 gcc_assert (MEM_P (mem));
14794
14795 addr = XEXP (mem, 0);
14796
14797 if (REG_P (addr)
14798 || (GET_CODE (addr) == PLUS
14799 && REG_P (XEXP (addr, 0))
14800 && CONST_INT_P (XEXP (addr, 1))))
14801 return 1;
14802 else
14803 return 0;
14804 }
14805
14806 /* Output an ADD r, s, #n where n may be too big for one instruction.
14807 If adding zero to one register, output nothing. */
14808 const char *
14809 output_add_immediate (rtx *operands)
14810 {
14811 HOST_WIDE_INT n = INTVAL (operands[2]);
14812
14813 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14814 {
14815 if (n < 0)
14816 output_multi_immediate (operands,
14817 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14818 -n);
14819 else
14820 output_multi_immediate (operands,
14821 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14822 n);
14823 }
14824
14825 return "";
14826 }
14827
14828 /* Output a multiple immediate operation.
14829 OPERANDS is the vector of operands referred to in the output patterns.
14830 INSTR1 is the output pattern to use for the first constant.
14831 INSTR2 is the output pattern to use for subsequent constants.
14832 IMMED_OP is the index of the constant slot in OPERANDS.
14833 N is the constant value. */
14834 static const char *
14835 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14836 int immed_op, HOST_WIDE_INT n)
14837 {
14838 #if HOST_BITS_PER_WIDE_INT > 32
14839 n &= 0xffffffff;
14840 #endif
14841
14842 if (n == 0)
14843 {
14844 /* Quick and easy output. */
14845 operands[immed_op] = const0_rtx;
14846 output_asm_insn (instr1, operands);
14847 }
14848 else
14849 {
14850 int i;
14851 const char * instr = instr1;
14852
14853 /* Note that n is never zero here (which would give no output). */
14854 for (i = 0; i < 32; i += 2)
14855 {
14856 if (n & (3 << i))
14857 {
14858 operands[immed_op] = GEN_INT (n & (255 << i));
14859 output_asm_insn (instr, operands);
14860 instr = instr2;
14861 i += 6;
14862 }
14863 }
14864 }
14865
14866 return "";
14867 }
14868
14869 /* Return the name of a shifter operation. */
14870 static const char *
14871 arm_shift_nmem(enum rtx_code code)
14872 {
14873 switch (code)
14874 {
14875 case ASHIFT:
14876 return ARM_LSL_NAME;
14877
14878 case ASHIFTRT:
14879 return "asr";
14880
14881 case LSHIFTRT:
14882 return "lsr";
14883
14884 case ROTATERT:
14885 return "ror";
14886
14887 default:
14888 abort();
14889 }
14890 }
14891
14892 /* Return the appropriate ARM instruction for the operation code.
14893 The returned result should not be overwritten. OP is the rtx of the
14894 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14895 was shifted. */
14896 const char *
14897 arithmetic_instr (rtx op, int shift_first_arg)
14898 {
14899 switch (GET_CODE (op))
14900 {
14901 case PLUS:
14902 return "add";
14903
14904 case MINUS:
14905 return shift_first_arg ? "rsb" : "sub";
14906
14907 case IOR:
14908 return "orr";
14909
14910 case XOR:
14911 return "eor";
14912
14913 case AND:
14914 return "and";
14915
14916 case ASHIFT:
14917 case ASHIFTRT:
14918 case LSHIFTRT:
14919 case ROTATERT:
14920 return arm_shift_nmem(GET_CODE(op));
14921
14922 default:
14923 gcc_unreachable ();
14924 }
14925 }
14926
14927 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14928 for the operation code. The returned result should not be overwritten.
14929 OP is the rtx code of the shift.
14930 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14931 shift. */
14932 static const char *
14933 shift_op (rtx op, HOST_WIDE_INT *amountp)
14934 {
14935 const char * mnem;
14936 enum rtx_code code = GET_CODE (op);
14937
14938 switch (GET_CODE (XEXP (op, 1)))
14939 {
14940 case REG:
14941 case SUBREG:
14942 *amountp = -1;
14943 break;
14944
14945 case CONST_INT:
14946 *amountp = INTVAL (XEXP (op, 1));
14947 break;
14948
14949 default:
14950 gcc_unreachable ();
14951 }
14952
14953 switch (code)
14954 {
14955 case ROTATE:
14956 gcc_assert (*amountp != -1);
14957 *amountp = 32 - *amountp;
14958 code = ROTATERT;
14959
14960 /* Fall through. */
14961
14962 case ASHIFT:
14963 case ASHIFTRT:
14964 case LSHIFTRT:
14965 case ROTATERT:
14966 mnem = arm_shift_nmem(code);
14967 break;
14968
14969 case MULT:
14970 /* We never have to worry about the amount being other than a
14971 power of 2, since this case can never be reloaded from a reg. */
14972 gcc_assert (*amountp != -1);
14973 *amountp = int_log2 (*amountp);
14974 return ARM_LSL_NAME;
14975
14976 default:
14977 gcc_unreachable ();
14978 }
14979
14980 if (*amountp != -1)
14981 {
14982 /* This is not 100% correct, but follows from the desire to merge
14983 multiplication by a power of 2 with the recognizer for a
14984 shift. >=32 is not a valid shift for "lsl", so we must try and
14985 output a shift that produces the correct arithmetical result.
14986 Using lsr #32 is identical except for the fact that the carry bit
14987 is not set correctly if we set the flags; but we never use the
14988 carry bit from such an operation, so we can ignore that. */
14989 if (code == ROTATERT)
14990 /* Rotate is just modulo 32. */
14991 *amountp &= 31;
14992 else if (*amountp != (*amountp & 31))
14993 {
14994 if (code == ASHIFT)
14995 mnem = "lsr";
14996 *amountp = 32;
14997 }
14998
14999 /* Shifts of 0 are no-ops. */
15000 if (*amountp == 0)
15001 return NULL;
15002 }
15003
15004 return mnem;
15005 }
15006
15007 /* Obtain the shift from the POWER of two. */
15008
15009 static HOST_WIDE_INT
15010 int_log2 (HOST_WIDE_INT power)
15011 {
15012 HOST_WIDE_INT shift = 0;
15013
15014 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15015 {
15016 gcc_assert (shift <= 31);
15017 shift++;
15018 }
15019
15020 return shift;
15021 }
15022
15023 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15024 because /bin/as is horribly restrictive. The judgement about
15025 whether or not each character is 'printable' (and can be output as
15026 is) or not (and must be printed with an octal escape) must be made
15027 with reference to the *host* character set -- the situation is
15028 similar to that discussed in the comments above pp_c_char in
15029 c-pretty-print.c. */
15030
15031 #define MAX_ASCII_LEN 51
15032
15033 void
15034 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15035 {
15036 int i;
15037 int len_so_far = 0;
15038
15039 fputs ("\t.ascii\t\"", stream);
15040
15041 for (i = 0; i < len; i++)
15042 {
15043 int c = p[i];
15044
15045 if (len_so_far >= MAX_ASCII_LEN)
15046 {
15047 fputs ("\"\n\t.ascii\t\"", stream);
15048 len_so_far = 0;
15049 }
15050
15051 if (ISPRINT (c))
15052 {
15053 if (c == '\\' || c == '\"')
15054 {
15055 putc ('\\', stream);
15056 len_so_far++;
15057 }
15058 putc (c, stream);
15059 len_so_far++;
15060 }
15061 else
15062 {
15063 fprintf (stream, "\\%03o", c);
15064 len_so_far += 4;
15065 }
15066 }
15067
15068 fputs ("\"\n", stream);
15069 }
15070 \f
15071 /* Compute the register save mask for registers 0 through 12
15072 inclusive. This code is used by arm_compute_save_reg_mask. */
15073
15074 static unsigned long
15075 arm_compute_save_reg0_reg12_mask (void)
15076 {
15077 unsigned long func_type = arm_current_func_type ();
15078 unsigned long save_reg_mask = 0;
15079 unsigned int reg;
15080
15081 if (IS_INTERRUPT (func_type))
15082 {
15083 unsigned int max_reg;
15084 /* Interrupt functions must not corrupt any registers,
15085 even call clobbered ones. If this is a leaf function
15086 we can just examine the registers used by the RTL, but
15087 otherwise we have to assume that whatever function is
15088 called might clobber anything, and so we have to save
15089 all the call-clobbered registers as well. */
15090 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15091 /* FIQ handlers have registers r8 - r12 banked, so
15092 we only need to check r0 - r7, Normal ISRs only
15093 bank r14 and r15, so we must check up to r12.
15094 r13 is the stack pointer which is always preserved,
15095 so we do not need to consider it here. */
15096 max_reg = 7;
15097 else
15098 max_reg = 12;
15099
15100 for (reg = 0; reg <= max_reg; reg++)
15101 if (df_regs_ever_live_p (reg)
15102 || (! crtl->is_leaf && call_used_regs[reg]))
15103 save_reg_mask |= (1 << reg);
15104
15105 /* Also save the pic base register if necessary. */
15106 if (flag_pic
15107 && !TARGET_SINGLE_PIC_BASE
15108 && arm_pic_register != INVALID_REGNUM
15109 && crtl->uses_pic_offset_table)
15110 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15111 }
15112 else if (IS_VOLATILE(func_type))
15113 {
15114 /* For noreturn functions we historically omitted register saves
15115 altogether. However this really messes up debugging. As a
15116 compromise save just the frame pointers. Combined with the link
15117 register saved elsewhere this should be sufficient to get
15118 a backtrace. */
15119 if (frame_pointer_needed)
15120 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15121 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15122 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15123 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15124 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15125 }
15126 else
15127 {
15128 /* In the normal case we only need to save those registers
15129 which are call saved and which are used by this function. */
15130 for (reg = 0; reg <= 11; reg++)
15131 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15132 save_reg_mask |= (1 << reg);
15133
15134 /* Handle the frame pointer as a special case. */
15135 if (frame_pointer_needed)
15136 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15137
15138 /* If we aren't loading the PIC register,
15139 don't stack it even though it may be live. */
15140 if (flag_pic
15141 && !TARGET_SINGLE_PIC_BASE
15142 && arm_pic_register != INVALID_REGNUM
15143 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15144 || crtl->uses_pic_offset_table))
15145 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15146
15147 /* The prologue will copy SP into R0, so save it. */
15148 if (IS_STACKALIGN (func_type))
15149 save_reg_mask |= 1;
15150 }
15151
15152 /* Save registers so the exception handler can modify them. */
15153 if (crtl->calls_eh_return)
15154 {
15155 unsigned int i;
15156
15157 for (i = 0; ; i++)
15158 {
15159 reg = EH_RETURN_DATA_REGNO (i);
15160 if (reg == INVALID_REGNUM)
15161 break;
15162 save_reg_mask |= 1 << reg;
15163 }
15164 }
15165
15166 return save_reg_mask;
15167 }
15168
15169
15170 /* Compute the number of bytes used to store the static chain register on the
15171 stack, above the stack frame. We need to know this accurately to get the
15172 alignment of the rest of the stack frame correct. */
15173
15174 static int arm_compute_static_chain_stack_bytes (void)
15175 {
15176 unsigned long func_type = arm_current_func_type ();
15177 int static_chain_stack_bytes = 0;
15178
15179 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15180 IS_NESTED (func_type) &&
15181 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15182 static_chain_stack_bytes = 4;
15183
15184 return static_chain_stack_bytes;
15185 }
15186
15187
15188 /* Compute a bit mask of which registers need to be
15189 saved on the stack for the current function.
15190 This is used by arm_get_frame_offsets, which may add extra registers. */
15191
15192 static unsigned long
15193 arm_compute_save_reg_mask (void)
15194 {
15195 unsigned int save_reg_mask = 0;
15196 unsigned long func_type = arm_current_func_type ();
15197 unsigned int reg;
15198
15199 if (IS_NAKED (func_type))
15200 /* This should never really happen. */
15201 return 0;
15202
15203 /* If we are creating a stack frame, then we must save the frame pointer,
15204 IP (which will hold the old stack pointer), LR and the PC. */
15205 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15206 save_reg_mask |=
15207 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15208 | (1 << IP_REGNUM)
15209 | (1 << LR_REGNUM)
15210 | (1 << PC_REGNUM);
15211
15212 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15213
15214 /* Decide if we need to save the link register.
15215 Interrupt routines have their own banked link register,
15216 so they never need to save it.
15217 Otherwise if we do not use the link register we do not need to save
15218 it. If we are pushing other registers onto the stack however, we
15219 can save an instruction in the epilogue by pushing the link register
15220 now and then popping it back into the PC. This incurs extra memory
15221 accesses though, so we only do it when optimizing for size, and only
15222 if we know that we will not need a fancy return sequence. */
15223 if (df_regs_ever_live_p (LR_REGNUM)
15224 || (save_reg_mask
15225 && optimize_size
15226 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15227 && !crtl->calls_eh_return))
15228 save_reg_mask |= 1 << LR_REGNUM;
15229
15230 if (cfun->machine->lr_save_eliminated)
15231 save_reg_mask &= ~ (1 << LR_REGNUM);
15232
15233 if (TARGET_REALLY_IWMMXT
15234 && ((bit_count (save_reg_mask)
15235 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15236 arm_compute_static_chain_stack_bytes())
15237 ) % 2) != 0)
15238 {
15239 /* The total number of registers that are going to be pushed
15240 onto the stack is odd. We need to ensure that the stack
15241 is 64-bit aligned before we start to save iWMMXt registers,
15242 and also before we start to create locals. (A local variable
15243 might be a double or long long which we will load/store using
15244 an iWMMXt instruction). Therefore we need to push another
15245 ARM register, so that the stack will be 64-bit aligned. We
15246 try to avoid using the arg registers (r0 -r3) as they might be
15247 used to pass values in a tail call. */
15248 for (reg = 4; reg <= 12; reg++)
15249 if ((save_reg_mask & (1 << reg)) == 0)
15250 break;
15251
15252 if (reg <= 12)
15253 save_reg_mask |= (1 << reg);
15254 else
15255 {
15256 cfun->machine->sibcall_blocked = 1;
15257 save_reg_mask |= (1 << 3);
15258 }
15259 }
15260
15261 /* We may need to push an additional register for use initializing the
15262 PIC base register. */
15263 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15264 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15265 {
15266 reg = thumb_find_work_register (1 << 4);
15267 if (!call_used_regs[reg])
15268 save_reg_mask |= (1 << reg);
15269 }
15270
15271 return save_reg_mask;
15272 }
15273
15274
15275 /* Compute a bit mask of which registers need to be
15276 saved on the stack for the current function. */
15277 static unsigned long
15278 thumb1_compute_save_reg_mask (void)
15279 {
15280 unsigned long mask;
15281 unsigned reg;
15282
15283 mask = 0;
15284 for (reg = 0; reg < 12; reg ++)
15285 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15286 mask |= 1 << reg;
15287
15288 if (flag_pic
15289 && !TARGET_SINGLE_PIC_BASE
15290 && arm_pic_register != INVALID_REGNUM
15291 && crtl->uses_pic_offset_table)
15292 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15293
15294 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15295 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15296 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15297
15298 /* LR will also be pushed if any lo regs are pushed. */
15299 if (mask & 0xff || thumb_force_lr_save ())
15300 mask |= (1 << LR_REGNUM);
15301
15302 /* Make sure we have a low work register if we need one.
15303 We will need one if we are going to push a high register,
15304 but we are not currently intending to push a low register. */
15305 if ((mask & 0xff) == 0
15306 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15307 {
15308 /* Use thumb_find_work_register to choose which register
15309 we will use. If the register is live then we will
15310 have to push it. Use LAST_LO_REGNUM as our fallback
15311 choice for the register to select. */
15312 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15313 /* Make sure the register returned by thumb_find_work_register is
15314 not part of the return value. */
15315 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15316 reg = LAST_LO_REGNUM;
15317
15318 if (! call_used_regs[reg])
15319 mask |= 1 << reg;
15320 }
15321
15322 /* The 504 below is 8 bytes less than 512 because there are two possible
15323 alignment words. We can't tell here if they will be present or not so we
15324 have to play it safe and assume that they are. */
15325 if ((CALLER_INTERWORKING_SLOT_SIZE +
15326 ROUND_UP_WORD (get_frame_size ()) +
15327 crtl->outgoing_args_size) >= 504)
15328 {
15329 /* This is the same as the code in thumb1_expand_prologue() which
15330 determines which register to use for stack decrement. */
15331 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15332 if (mask & (1 << reg))
15333 break;
15334
15335 if (reg > LAST_LO_REGNUM)
15336 {
15337 /* Make sure we have a register available for stack decrement. */
15338 mask |= 1 << LAST_LO_REGNUM;
15339 }
15340 }
15341
15342 return mask;
15343 }
15344
15345
15346 /* Return the number of bytes required to save VFP registers. */
15347 static int
15348 arm_get_vfp_saved_size (void)
15349 {
15350 unsigned int regno;
15351 int count;
15352 int saved;
15353
15354 saved = 0;
15355 /* Space for saved VFP registers. */
15356 if (TARGET_HARD_FLOAT && TARGET_VFP)
15357 {
15358 count = 0;
15359 for (regno = FIRST_VFP_REGNUM;
15360 regno < LAST_VFP_REGNUM;
15361 regno += 2)
15362 {
15363 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15364 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15365 {
15366 if (count > 0)
15367 {
15368 /* Workaround ARM10 VFPr1 bug. */
15369 if (count == 2 && !arm_arch6)
15370 count++;
15371 saved += count * 8;
15372 }
15373 count = 0;
15374 }
15375 else
15376 count++;
15377 }
15378 if (count > 0)
15379 {
15380 if (count == 2 && !arm_arch6)
15381 count++;
15382 saved += count * 8;
15383 }
15384 }
15385 return saved;
15386 }
15387
15388
15389 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15390 everything bar the final return instruction. If simple_return is true,
15391 then do not output epilogue, because it has already been emitted in RTL. */
15392 const char *
15393 output_return_instruction (rtx operand, bool really_return, bool reverse,
15394 bool simple_return)
15395 {
15396 char conditional[10];
15397 char instr[100];
15398 unsigned reg;
15399 unsigned long live_regs_mask;
15400 unsigned long func_type;
15401 arm_stack_offsets *offsets;
15402
15403 func_type = arm_current_func_type ();
15404
15405 if (IS_NAKED (func_type))
15406 return "";
15407
15408 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15409 {
15410 /* If this function was declared non-returning, and we have
15411 found a tail call, then we have to trust that the called
15412 function won't return. */
15413 if (really_return)
15414 {
15415 rtx ops[2];
15416
15417 /* Otherwise, trap an attempted return by aborting. */
15418 ops[0] = operand;
15419 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15420 : "abort");
15421 assemble_external_libcall (ops[1]);
15422 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15423 }
15424
15425 return "";
15426 }
15427
15428 gcc_assert (!cfun->calls_alloca || really_return);
15429
15430 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15431
15432 cfun->machine->return_used_this_function = 1;
15433
15434 offsets = arm_get_frame_offsets ();
15435 live_regs_mask = offsets->saved_regs_mask;
15436
15437 if (!simple_return && live_regs_mask)
15438 {
15439 const char * return_reg;
15440
15441 /* If we do not have any special requirements for function exit
15442 (e.g. interworking) then we can load the return address
15443 directly into the PC. Otherwise we must load it into LR. */
15444 if (really_return
15445 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15446 return_reg = reg_names[PC_REGNUM];
15447 else
15448 return_reg = reg_names[LR_REGNUM];
15449
15450 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15451 {
15452 /* There are three possible reasons for the IP register
15453 being saved. 1) a stack frame was created, in which case
15454 IP contains the old stack pointer, or 2) an ISR routine
15455 corrupted it, or 3) it was saved to align the stack on
15456 iWMMXt. In case 1, restore IP into SP, otherwise just
15457 restore IP. */
15458 if (frame_pointer_needed)
15459 {
15460 live_regs_mask &= ~ (1 << IP_REGNUM);
15461 live_regs_mask |= (1 << SP_REGNUM);
15462 }
15463 else
15464 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15465 }
15466
15467 /* On some ARM architectures it is faster to use LDR rather than
15468 LDM to load a single register. On other architectures, the
15469 cost is the same. In 26 bit mode, or for exception handlers,
15470 we have to use LDM to load the PC so that the CPSR is also
15471 restored. */
15472 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15473 if (live_regs_mask == (1U << reg))
15474 break;
15475
15476 if (reg <= LAST_ARM_REGNUM
15477 && (reg != LR_REGNUM
15478 || ! really_return
15479 || ! IS_INTERRUPT (func_type)))
15480 {
15481 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15482 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15483 }
15484 else
15485 {
15486 char *p;
15487 int first = 1;
15488
15489 /* Generate the load multiple instruction to restore the
15490 registers. Note we can get here, even if
15491 frame_pointer_needed is true, but only if sp already
15492 points to the base of the saved core registers. */
15493 if (live_regs_mask & (1 << SP_REGNUM))
15494 {
15495 unsigned HOST_WIDE_INT stack_adjust;
15496
15497 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15498 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15499
15500 if (stack_adjust && arm_arch5 && TARGET_ARM)
15501 if (TARGET_UNIFIED_ASM)
15502 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15503 else
15504 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15505 else
15506 {
15507 /* If we can't use ldmib (SA110 bug),
15508 then try to pop r3 instead. */
15509 if (stack_adjust)
15510 live_regs_mask |= 1 << 3;
15511
15512 if (TARGET_UNIFIED_ASM)
15513 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15514 else
15515 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15516 }
15517 }
15518 else
15519 if (TARGET_UNIFIED_ASM)
15520 sprintf (instr, "pop%s\t{", conditional);
15521 else
15522 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15523
15524 p = instr + strlen (instr);
15525
15526 for (reg = 0; reg <= SP_REGNUM; reg++)
15527 if (live_regs_mask & (1 << reg))
15528 {
15529 int l = strlen (reg_names[reg]);
15530
15531 if (first)
15532 first = 0;
15533 else
15534 {
15535 memcpy (p, ", ", 2);
15536 p += 2;
15537 }
15538
15539 memcpy (p, "%|", 2);
15540 memcpy (p + 2, reg_names[reg], l);
15541 p += l + 2;
15542 }
15543
15544 if (live_regs_mask & (1 << LR_REGNUM))
15545 {
15546 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15547 /* If returning from an interrupt, restore the CPSR. */
15548 if (IS_INTERRUPT (func_type))
15549 strcat (p, "^");
15550 }
15551 else
15552 strcpy (p, "}");
15553 }
15554
15555 output_asm_insn (instr, & operand);
15556
15557 /* See if we need to generate an extra instruction to
15558 perform the actual function return. */
15559 if (really_return
15560 && func_type != ARM_FT_INTERWORKED
15561 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15562 {
15563 /* The return has already been handled
15564 by loading the LR into the PC. */
15565 return "";
15566 }
15567 }
15568
15569 if (really_return)
15570 {
15571 switch ((int) ARM_FUNC_TYPE (func_type))
15572 {
15573 case ARM_FT_ISR:
15574 case ARM_FT_FIQ:
15575 /* ??? This is wrong for unified assembly syntax. */
15576 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15577 break;
15578
15579 case ARM_FT_INTERWORKED:
15580 sprintf (instr, "bx%s\t%%|lr", conditional);
15581 break;
15582
15583 case ARM_FT_EXCEPTION:
15584 /* ??? This is wrong for unified assembly syntax. */
15585 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15586 break;
15587
15588 default:
15589 /* Use bx if it's available. */
15590 if (arm_arch5 || arm_arch4t)
15591 sprintf (instr, "bx%s\t%%|lr", conditional);
15592 else
15593 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15594 break;
15595 }
15596
15597 output_asm_insn (instr, & operand);
15598 }
15599
15600 return "";
15601 }
15602
15603 /* Write the function name into the code section, directly preceding
15604 the function prologue.
15605
15606 Code will be output similar to this:
15607 t0
15608 .ascii "arm_poke_function_name", 0
15609 .align
15610 t1
15611 .word 0xff000000 + (t1 - t0)
15612 arm_poke_function_name
15613 mov ip, sp
15614 stmfd sp!, {fp, ip, lr, pc}
15615 sub fp, ip, #4
15616
15617 When performing a stack backtrace, code can inspect the value
15618 of 'pc' stored at 'fp' + 0. If the trace function then looks
15619 at location pc - 12 and the top 8 bits are set, then we know
15620 that there is a function name embedded immediately preceding this
15621 location and has length ((pc[-3]) & 0xff000000).
15622
15623 We assume that pc is declared as a pointer to an unsigned long.
15624
15625 It is of no benefit to output the function name if we are assembling
15626 a leaf function. These function types will not contain a stack
15627 backtrace structure, therefore it is not possible to determine the
15628 function name. */
15629 void
15630 arm_poke_function_name (FILE *stream, const char *name)
15631 {
15632 unsigned long alignlength;
15633 unsigned long length;
15634 rtx x;
15635
15636 length = strlen (name) + 1;
15637 alignlength = ROUND_UP_WORD (length);
15638
15639 ASM_OUTPUT_ASCII (stream, name, length);
15640 ASM_OUTPUT_ALIGN (stream, 2);
15641 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15642 assemble_aligned_integer (UNITS_PER_WORD, x);
15643 }
15644
15645 /* Place some comments into the assembler stream
15646 describing the current function. */
15647 static void
15648 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15649 {
15650 unsigned long func_type;
15651
15652 /* ??? Do we want to print some of the below anyway? */
15653 if (TARGET_THUMB1)
15654 return;
15655
15656 /* Sanity check. */
15657 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15658
15659 func_type = arm_current_func_type ();
15660
15661 switch ((int) ARM_FUNC_TYPE (func_type))
15662 {
15663 default:
15664 case ARM_FT_NORMAL:
15665 break;
15666 case ARM_FT_INTERWORKED:
15667 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15668 break;
15669 case ARM_FT_ISR:
15670 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15671 break;
15672 case ARM_FT_FIQ:
15673 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15674 break;
15675 case ARM_FT_EXCEPTION:
15676 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15677 break;
15678 }
15679
15680 if (IS_NAKED (func_type))
15681 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15682
15683 if (IS_VOLATILE (func_type))
15684 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15685
15686 if (IS_NESTED (func_type))
15687 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15688 if (IS_STACKALIGN (func_type))
15689 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15690
15691 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15692 crtl->args.size,
15693 crtl->args.pretend_args_size, frame_size);
15694
15695 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15696 frame_pointer_needed,
15697 cfun->machine->uses_anonymous_args);
15698
15699 if (cfun->machine->lr_save_eliminated)
15700 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15701
15702 if (crtl->calls_eh_return)
15703 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15704
15705 }
15706
15707 static void
15708 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15709 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15710 {
15711 arm_stack_offsets *offsets;
15712
15713 if (TARGET_THUMB1)
15714 {
15715 int regno;
15716
15717 /* Emit any call-via-reg trampolines that are needed for v4t support
15718 of call_reg and call_value_reg type insns. */
15719 for (regno = 0; regno < LR_REGNUM; regno++)
15720 {
15721 rtx label = cfun->machine->call_via[regno];
15722
15723 if (label != NULL)
15724 {
15725 switch_to_section (function_section (current_function_decl));
15726 targetm.asm_out.internal_label (asm_out_file, "L",
15727 CODE_LABEL_NUMBER (label));
15728 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15729 }
15730 }
15731
15732 /* ??? Probably not safe to set this here, since it assumes that a
15733 function will be emitted as assembly immediately after we generate
15734 RTL for it. This does not happen for inline functions. */
15735 cfun->machine->return_used_this_function = 0;
15736 }
15737 else /* TARGET_32BIT */
15738 {
15739 /* We need to take into account any stack-frame rounding. */
15740 offsets = arm_get_frame_offsets ();
15741
15742 gcc_assert (!use_return_insn (FALSE, NULL)
15743 || (cfun->machine->return_used_this_function != 0)
15744 || offsets->saved_regs == offsets->outgoing_args
15745 || frame_pointer_needed);
15746
15747 /* Reset the ARM-specific per-function variables. */
15748 after_arm_reorg = 0;
15749 }
15750 }
15751
15752 /* Generate and emit an insn that we will recognize as a push_multi.
15753 Unfortunately, since this insn does not reflect very well the actual
15754 semantics of the operation, we need to annotate the insn for the benefit
15755 of DWARF2 frame unwind information. */
15756 static rtx
15757 emit_multi_reg_push (unsigned long mask)
15758 {
15759 int num_regs = 0;
15760 int num_dwarf_regs;
15761 int i, j;
15762 rtx par;
15763 rtx dwarf;
15764 int dwarf_par_index;
15765 rtx tmp, reg;
15766
15767 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15768 if (mask & (1 << i))
15769 num_regs++;
15770
15771 gcc_assert (num_regs && num_regs <= 16);
15772
15773 /* We don't record the PC in the dwarf frame information. */
15774 num_dwarf_regs = num_regs;
15775 if (mask & (1 << PC_REGNUM))
15776 num_dwarf_regs--;
15777
15778 /* For the body of the insn we are going to generate an UNSPEC in
15779 parallel with several USEs. This allows the insn to be recognized
15780 by the push_multi pattern in the arm.md file.
15781
15782 The body of the insn looks something like this:
15783
15784 (parallel [
15785 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15786 (const_int:SI <num>)))
15787 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15788 (use (reg:SI XX))
15789 (use (reg:SI YY))
15790 ...
15791 ])
15792
15793 For the frame note however, we try to be more explicit and actually
15794 show each register being stored into the stack frame, plus a (single)
15795 decrement of the stack pointer. We do it this way in order to be
15796 friendly to the stack unwinding code, which only wants to see a single
15797 stack decrement per instruction. The RTL we generate for the note looks
15798 something like this:
15799
15800 (sequence [
15801 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15802 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15803 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15804 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15805 ...
15806 ])
15807
15808 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15809 instead we'd have a parallel expression detailing all
15810 the stores to the various memory addresses so that debug
15811 information is more up-to-date. Remember however while writing
15812 this to take care of the constraints with the push instruction.
15813
15814 Note also that this has to be taken care of for the VFP registers.
15815
15816 For more see PR43399. */
15817
15818 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15819 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15820 dwarf_par_index = 1;
15821
15822 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15823 {
15824 if (mask & (1 << i))
15825 {
15826 reg = gen_rtx_REG (SImode, i);
15827
15828 XVECEXP (par, 0, 0)
15829 = gen_rtx_SET (VOIDmode,
15830 gen_frame_mem
15831 (BLKmode,
15832 gen_rtx_PRE_MODIFY (Pmode,
15833 stack_pointer_rtx,
15834 plus_constant
15835 (Pmode, stack_pointer_rtx,
15836 -4 * num_regs))
15837 ),
15838 gen_rtx_UNSPEC (BLKmode,
15839 gen_rtvec (1, reg),
15840 UNSPEC_PUSH_MULT));
15841
15842 if (i != PC_REGNUM)
15843 {
15844 tmp = gen_rtx_SET (VOIDmode,
15845 gen_frame_mem (SImode, stack_pointer_rtx),
15846 reg);
15847 RTX_FRAME_RELATED_P (tmp) = 1;
15848 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15849 dwarf_par_index++;
15850 }
15851
15852 break;
15853 }
15854 }
15855
15856 for (j = 1, i++; j < num_regs; i++)
15857 {
15858 if (mask & (1 << i))
15859 {
15860 reg = gen_rtx_REG (SImode, i);
15861
15862 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15863
15864 if (i != PC_REGNUM)
15865 {
15866 tmp
15867 = gen_rtx_SET (VOIDmode,
15868 gen_frame_mem
15869 (SImode,
15870 plus_constant (Pmode, stack_pointer_rtx,
15871 4 * j)),
15872 reg);
15873 RTX_FRAME_RELATED_P (tmp) = 1;
15874 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15875 }
15876
15877 j++;
15878 }
15879 }
15880
15881 par = emit_insn (par);
15882
15883 tmp = gen_rtx_SET (VOIDmode,
15884 stack_pointer_rtx,
15885 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15886 RTX_FRAME_RELATED_P (tmp) = 1;
15887 XVECEXP (dwarf, 0, 0) = tmp;
15888
15889 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15890
15891 return par;
15892 }
15893
15894 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15895 SAVED_REGS_MASK shows which registers need to be restored.
15896
15897 Unfortunately, since this insn does not reflect very well the actual
15898 semantics of the operation, we need to annotate the insn for the benefit
15899 of DWARF2 frame unwind information. */
15900 static void
15901 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
15902 {
15903 int num_regs = 0;
15904 int i, j;
15905 rtx par;
15906 rtx dwarf = NULL_RTX;
15907 rtx tmp, reg;
15908 bool return_in_pc;
15909 int offset_adj;
15910 int emit_update;
15911
15912 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
15913 offset_adj = return_in_pc ? 1 : 0;
15914 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15915 if (saved_regs_mask & (1 << i))
15916 num_regs++;
15917
15918 gcc_assert (num_regs && num_regs <= 16);
15919
15920 /* If SP is in reglist, then we don't emit SP update insn. */
15921 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
15922
15923 /* The parallel needs to hold num_regs SETs
15924 and one SET for the stack update. */
15925 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
15926
15927 if (return_in_pc)
15928 {
15929 tmp = ret_rtx;
15930 XVECEXP (par, 0, 0) = tmp;
15931 }
15932
15933 if (emit_update)
15934 {
15935 /* Increment the stack pointer, based on there being
15936 num_regs 4-byte registers to restore. */
15937 tmp = gen_rtx_SET (VOIDmode,
15938 stack_pointer_rtx,
15939 plus_constant (Pmode,
15940 stack_pointer_rtx,
15941 4 * num_regs));
15942 RTX_FRAME_RELATED_P (tmp) = 1;
15943 XVECEXP (par, 0, offset_adj) = tmp;
15944 }
15945
15946 /* Now restore every reg, which may include PC. */
15947 for (j = 0, i = 0; j < num_regs; i++)
15948 if (saved_regs_mask & (1 << i))
15949 {
15950 reg = gen_rtx_REG (SImode, i);
15951 tmp = gen_rtx_SET (VOIDmode,
15952 reg,
15953 gen_frame_mem
15954 (SImode,
15955 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
15956 RTX_FRAME_RELATED_P (tmp) = 1;
15957 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
15958
15959 /* We need to maintain a sequence for DWARF info too. As dwarf info
15960 should not have PC, skip PC. */
15961 if (i != PC_REGNUM)
15962 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15963
15964 j++;
15965 }
15966
15967 if (return_in_pc)
15968 par = emit_jump_insn (par);
15969 else
15970 par = emit_insn (par);
15971
15972 REG_NOTES (par) = dwarf;
15973 }
15974
15975 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15976 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15977
15978 Unfortunately, since this insn does not reflect very well the actual
15979 semantics of the operation, we need to annotate the insn for the benefit
15980 of DWARF2 frame unwind information. */
15981 static void
15982 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
15983 {
15984 int i, j;
15985 rtx par;
15986 rtx dwarf = NULL_RTX;
15987 rtx tmp, reg;
15988
15989 gcc_assert (num_regs && num_regs <= 32);
15990
15991 /* Workaround ARM10 VFPr1 bug. */
15992 if (num_regs == 2 && !arm_arch6)
15993 {
15994 if (first_reg == 15)
15995 first_reg--;
15996
15997 num_regs++;
15998 }
15999
16000 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16001 there could be up to 32 D-registers to restore.
16002 If there are more than 16 D-registers, make two recursive calls,
16003 each of which emits one pop_multi instruction. */
16004 if (num_regs > 16)
16005 {
16006 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16007 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16008 return;
16009 }
16010
16011 /* The parallel needs to hold num_regs SETs
16012 and one SET for the stack update. */
16013 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16014
16015 /* Increment the stack pointer, based on there being
16016 num_regs 8-byte registers to restore. */
16017 tmp = gen_rtx_SET (VOIDmode,
16018 base_reg,
16019 plus_constant (Pmode, base_reg, 8 * num_regs));
16020 RTX_FRAME_RELATED_P (tmp) = 1;
16021 XVECEXP (par, 0, 0) = tmp;
16022
16023 /* Now show every reg that will be restored, using a SET for each. */
16024 for (j = 0, i=first_reg; j < num_regs; i += 2)
16025 {
16026 reg = gen_rtx_REG (DFmode, i);
16027
16028 tmp = gen_rtx_SET (VOIDmode,
16029 reg,
16030 gen_frame_mem
16031 (DFmode,
16032 plus_constant (Pmode, base_reg, 8 * j)));
16033 RTX_FRAME_RELATED_P (tmp) = 1;
16034 XVECEXP (par, 0, j + 1) = tmp;
16035
16036 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16037
16038 j++;
16039 }
16040
16041 par = emit_insn (par);
16042 REG_NOTES (par) = dwarf;
16043 }
16044
16045 /* Calculate the size of the return value that is passed in registers. */
16046 static unsigned
16047 arm_size_return_regs (void)
16048 {
16049 enum machine_mode mode;
16050
16051 if (crtl->return_rtx != 0)
16052 mode = GET_MODE (crtl->return_rtx);
16053 else
16054 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16055
16056 return GET_MODE_SIZE (mode);
16057 }
16058
16059 /* Return true if the current function needs to save/restore LR. */
16060 static bool
16061 thumb_force_lr_save (void)
16062 {
16063 return !cfun->machine->lr_save_eliminated
16064 && (!leaf_function_p ()
16065 || thumb_far_jump_used_p ()
16066 || df_regs_ever_live_p (LR_REGNUM));
16067 }
16068
16069
16070 /* Return true if r3 is used by any of the tail call insns in the
16071 current function. */
16072 static bool
16073 any_sibcall_uses_r3 (void)
16074 {
16075 edge_iterator ei;
16076 edge e;
16077
16078 if (!crtl->tail_call_emit)
16079 return false;
16080 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16081 if (e->flags & EDGE_SIBCALL)
16082 {
16083 rtx call = BB_END (e->src);
16084 if (!CALL_P (call))
16085 call = prev_nonnote_nondebug_insn (call);
16086 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16087 if (find_regno_fusage (call, USE, 3))
16088 return true;
16089 }
16090 return false;
16091 }
16092
16093
16094 /* Compute the distance from register FROM to register TO.
16095 These can be the arg pointer (26), the soft frame pointer (25),
16096 the stack pointer (13) or the hard frame pointer (11).
16097 In thumb mode r7 is used as the soft frame pointer, if needed.
16098 Typical stack layout looks like this:
16099
16100 old stack pointer -> | |
16101 ----
16102 | | \
16103 | | saved arguments for
16104 | | vararg functions
16105 | | /
16106 --
16107 hard FP & arg pointer -> | | \
16108 | | stack
16109 | | frame
16110 | | /
16111 --
16112 | | \
16113 | | call saved
16114 | | registers
16115 soft frame pointer -> | | /
16116 --
16117 | | \
16118 | | local
16119 | | variables
16120 locals base pointer -> | | /
16121 --
16122 | | \
16123 | | outgoing
16124 | | arguments
16125 current stack pointer -> | | /
16126 --
16127
16128 For a given function some or all of these stack components
16129 may not be needed, giving rise to the possibility of
16130 eliminating some of the registers.
16131
16132 The values returned by this function must reflect the behavior
16133 of arm_expand_prologue() and arm_compute_save_reg_mask().
16134
16135 The sign of the number returned reflects the direction of stack
16136 growth, so the values are positive for all eliminations except
16137 from the soft frame pointer to the hard frame pointer.
16138
16139 SFP may point just inside the local variables block to ensure correct
16140 alignment. */
16141
16142
16143 /* Calculate stack offsets. These are used to calculate register elimination
16144 offsets and in prologue/epilogue code. Also calculates which registers
16145 should be saved. */
16146
16147 static arm_stack_offsets *
16148 arm_get_frame_offsets (void)
16149 {
16150 struct arm_stack_offsets *offsets;
16151 unsigned long func_type;
16152 int leaf;
16153 int saved;
16154 int core_saved;
16155 HOST_WIDE_INT frame_size;
16156 int i;
16157
16158 offsets = &cfun->machine->stack_offsets;
16159
16160 /* We need to know if we are a leaf function. Unfortunately, it
16161 is possible to be called after start_sequence has been called,
16162 which causes get_insns to return the insns for the sequence,
16163 not the function, which will cause leaf_function_p to return
16164 the incorrect result.
16165
16166 to know about leaf functions once reload has completed, and the
16167 frame size cannot be changed after that time, so we can safely
16168 use the cached value. */
16169
16170 if (reload_completed)
16171 return offsets;
16172
16173 /* Initially this is the size of the local variables. It will translated
16174 into an offset once we have determined the size of preceding data. */
16175 frame_size = ROUND_UP_WORD (get_frame_size ());
16176
16177 leaf = leaf_function_p ();
16178
16179 /* Space for variadic functions. */
16180 offsets->saved_args = crtl->args.pretend_args_size;
16181
16182 /* In Thumb mode this is incorrect, but never used. */
16183 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16184 arm_compute_static_chain_stack_bytes();
16185
16186 if (TARGET_32BIT)
16187 {
16188 unsigned int regno;
16189
16190 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16191 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16192 saved = core_saved;
16193
16194 /* We know that SP will be doubleword aligned on entry, and we must
16195 preserve that condition at any subroutine call. We also require the
16196 soft frame pointer to be doubleword aligned. */
16197
16198 if (TARGET_REALLY_IWMMXT)
16199 {
16200 /* Check for the call-saved iWMMXt registers. */
16201 for (regno = FIRST_IWMMXT_REGNUM;
16202 regno <= LAST_IWMMXT_REGNUM;
16203 regno++)
16204 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16205 saved += 8;
16206 }
16207
16208 func_type = arm_current_func_type ();
16209 /* Space for saved VFP registers. */
16210 if (! IS_VOLATILE (func_type)
16211 && TARGET_HARD_FLOAT && TARGET_VFP)
16212 saved += arm_get_vfp_saved_size ();
16213 }
16214 else /* TARGET_THUMB1 */
16215 {
16216 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16217 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16218 saved = core_saved;
16219 if (TARGET_BACKTRACE)
16220 saved += 16;
16221 }
16222
16223 /* Saved registers include the stack frame. */
16224 offsets->saved_regs = offsets->saved_args + saved +
16225 arm_compute_static_chain_stack_bytes();
16226 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16227 /* A leaf function does not need any stack alignment if it has nothing
16228 on the stack. */
16229 if (leaf && frame_size == 0
16230 /* However if it calls alloca(), we have a dynamically allocated
16231 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16232 && ! cfun->calls_alloca)
16233 {
16234 offsets->outgoing_args = offsets->soft_frame;
16235 offsets->locals_base = offsets->soft_frame;
16236 return offsets;
16237 }
16238
16239 /* Ensure SFP has the correct alignment. */
16240 if (ARM_DOUBLEWORD_ALIGN
16241 && (offsets->soft_frame & 7))
16242 {
16243 offsets->soft_frame += 4;
16244 /* Try to align stack by pushing an extra reg. Don't bother doing this
16245 when there is a stack frame as the alignment will be rolled into
16246 the normal stack adjustment. */
16247 if (frame_size + crtl->outgoing_args_size == 0)
16248 {
16249 int reg = -1;
16250
16251 /* If it is safe to use r3, then do so. This sometimes
16252 generates better code on Thumb-2 by avoiding the need to
16253 use 32-bit push/pop instructions. */
16254 if (! any_sibcall_uses_r3 ()
16255 && arm_size_return_regs () <= 12
16256 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16257 {
16258 reg = 3;
16259 }
16260 else
16261 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16262 {
16263 /* Avoid fixed registers; they may be changed at
16264 arbitrary times so it's unsafe to restore them
16265 during the epilogue. */
16266 if (!fixed_regs[i]
16267 && (offsets->saved_regs_mask & (1 << i)) == 0)
16268 {
16269 reg = i;
16270 break;
16271 }
16272 }
16273
16274 if (reg != -1)
16275 {
16276 offsets->saved_regs += 4;
16277 offsets->saved_regs_mask |= (1 << reg);
16278 }
16279 }
16280 }
16281
16282 offsets->locals_base = offsets->soft_frame + frame_size;
16283 offsets->outgoing_args = (offsets->locals_base
16284 + crtl->outgoing_args_size);
16285
16286 if (ARM_DOUBLEWORD_ALIGN)
16287 {
16288 /* Ensure SP remains doubleword aligned. */
16289 if (offsets->outgoing_args & 7)
16290 offsets->outgoing_args += 4;
16291 gcc_assert (!(offsets->outgoing_args & 7));
16292 }
16293
16294 return offsets;
16295 }
16296
16297
16298 /* Calculate the relative offsets for the different stack pointers. Positive
16299 offsets are in the direction of stack growth. */
16300
16301 HOST_WIDE_INT
16302 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16303 {
16304 arm_stack_offsets *offsets;
16305
16306 offsets = arm_get_frame_offsets ();
16307
16308 /* OK, now we have enough information to compute the distances.
16309 There must be an entry in these switch tables for each pair
16310 of registers in ELIMINABLE_REGS, even if some of the entries
16311 seem to be redundant or useless. */
16312 switch (from)
16313 {
16314 case ARG_POINTER_REGNUM:
16315 switch (to)
16316 {
16317 case THUMB_HARD_FRAME_POINTER_REGNUM:
16318 return 0;
16319
16320 case FRAME_POINTER_REGNUM:
16321 /* This is the reverse of the soft frame pointer
16322 to hard frame pointer elimination below. */
16323 return offsets->soft_frame - offsets->saved_args;
16324
16325 case ARM_HARD_FRAME_POINTER_REGNUM:
16326 /* This is only non-zero in the case where the static chain register
16327 is stored above the frame. */
16328 return offsets->frame - offsets->saved_args - 4;
16329
16330 case STACK_POINTER_REGNUM:
16331 /* If nothing has been pushed on the stack at all
16332 then this will return -4. This *is* correct! */
16333 return offsets->outgoing_args - (offsets->saved_args + 4);
16334
16335 default:
16336 gcc_unreachable ();
16337 }
16338 gcc_unreachable ();
16339
16340 case FRAME_POINTER_REGNUM:
16341 switch (to)
16342 {
16343 case THUMB_HARD_FRAME_POINTER_REGNUM:
16344 return 0;
16345
16346 case ARM_HARD_FRAME_POINTER_REGNUM:
16347 /* The hard frame pointer points to the top entry in the
16348 stack frame. The soft frame pointer to the bottom entry
16349 in the stack frame. If there is no stack frame at all,
16350 then they are identical. */
16351
16352 return offsets->frame - offsets->soft_frame;
16353
16354 case STACK_POINTER_REGNUM:
16355 return offsets->outgoing_args - offsets->soft_frame;
16356
16357 default:
16358 gcc_unreachable ();
16359 }
16360 gcc_unreachable ();
16361
16362 default:
16363 /* You cannot eliminate from the stack pointer.
16364 In theory you could eliminate from the hard frame
16365 pointer to the stack pointer, but this will never
16366 happen, since if a stack frame is not needed the
16367 hard frame pointer will never be used. */
16368 gcc_unreachable ();
16369 }
16370 }
16371
16372 /* Given FROM and TO register numbers, say whether this elimination is
16373 allowed. Frame pointer elimination is automatically handled.
16374
16375 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16376 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16377 pointer, we must eliminate FRAME_POINTER_REGNUM into
16378 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16379 ARG_POINTER_REGNUM. */
16380
16381 bool
16382 arm_can_eliminate (const int from, const int to)
16383 {
16384 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16385 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16386 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16387 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16388 true);
16389 }
16390
16391 /* Emit RTL to save coprocessor registers on function entry. Returns the
16392 number of bytes pushed. */
16393
16394 static int
16395 arm_save_coproc_regs(void)
16396 {
16397 int saved_size = 0;
16398 unsigned reg;
16399 unsigned start_reg;
16400 rtx insn;
16401
16402 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16403 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16404 {
16405 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16406 insn = gen_rtx_MEM (V2SImode, insn);
16407 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16408 RTX_FRAME_RELATED_P (insn) = 1;
16409 saved_size += 8;
16410 }
16411
16412 if (TARGET_HARD_FLOAT && TARGET_VFP)
16413 {
16414 start_reg = FIRST_VFP_REGNUM;
16415
16416 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16417 {
16418 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16419 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16420 {
16421 if (start_reg != reg)
16422 saved_size += vfp_emit_fstmd (start_reg,
16423 (reg - start_reg) / 2);
16424 start_reg = reg + 2;
16425 }
16426 }
16427 if (start_reg != reg)
16428 saved_size += vfp_emit_fstmd (start_reg,
16429 (reg - start_reg) / 2);
16430 }
16431 return saved_size;
16432 }
16433
16434
16435 /* Set the Thumb frame pointer from the stack pointer. */
16436
16437 static void
16438 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16439 {
16440 HOST_WIDE_INT amount;
16441 rtx insn, dwarf;
16442
16443 amount = offsets->outgoing_args - offsets->locals_base;
16444 if (amount < 1024)
16445 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16446 stack_pointer_rtx, GEN_INT (amount)));
16447 else
16448 {
16449 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16450 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16451 expects the first two operands to be the same. */
16452 if (TARGET_THUMB2)
16453 {
16454 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16455 stack_pointer_rtx,
16456 hard_frame_pointer_rtx));
16457 }
16458 else
16459 {
16460 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16461 hard_frame_pointer_rtx,
16462 stack_pointer_rtx));
16463 }
16464 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16465 plus_constant (Pmode, stack_pointer_rtx, amount));
16466 RTX_FRAME_RELATED_P (dwarf) = 1;
16467 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16468 }
16469
16470 RTX_FRAME_RELATED_P (insn) = 1;
16471 }
16472
16473 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16474 function. */
16475 void
16476 arm_expand_prologue (void)
16477 {
16478 rtx amount;
16479 rtx insn;
16480 rtx ip_rtx;
16481 unsigned long live_regs_mask;
16482 unsigned long func_type;
16483 int fp_offset = 0;
16484 int saved_pretend_args = 0;
16485 int saved_regs = 0;
16486 unsigned HOST_WIDE_INT args_to_push;
16487 arm_stack_offsets *offsets;
16488
16489 func_type = arm_current_func_type ();
16490
16491 /* Naked functions don't have prologues. */
16492 if (IS_NAKED (func_type))
16493 return;
16494
16495 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16496 args_to_push = crtl->args.pretend_args_size;
16497
16498 /* Compute which register we will have to save onto the stack. */
16499 offsets = arm_get_frame_offsets ();
16500 live_regs_mask = offsets->saved_regs_mask;
16501
16502 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16503
16504 if (IS_STACKALIGN (func_type))
16505 {
16506 rtx r0, r1;
16507
16508 /* Handle a word-aligned stack pointer. We generate the following:
16509
16510 mov r0, sp
16511 bic r1, r0, #7
16512 mov sp, r1
16513 <save and restore r0 in normal prologue/epilogue>
16514 mov sp, r0
16515 bx lr
16516
16517 The unwinder doesn't need to know about the stack realignment.
16518 Just tell it we saved SP in r0. */
16519 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16520
16521 r0 = gen_rtx_REG (SImode, 0);
16522 r1 = gen_rtx_REG (SImode, 1);
16523
16524 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16525 RTX_FRAME_RELATED_P (insn) = 1;
16526 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16527
16528 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16529
16530 /* ??? The CFA changes here, which may cause GDB to conclude that it
16531 has entered a different function. That said, the unwind info is
16532 correct, individually, before and after this instruction because
16533 we've described the save of SP, which will override the default
16534 handling of SP as restoring from the CFA. */
16535 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16536 }
16537
16538 /* For APCS frames, if IP register is clobbered
16539 when creating frame, save that register in a special
16540 way. */
16541 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16542 {
16543 if (IS_INTERRUPT (func_type))
16544 {
16545 /* Interrupt functions must not corrupt any registers.
16546 Creating a frame pointer however, corrupts the IP
16547 register, so we must push it first. */
16548 emit_multi_reg_push (1 << IP_REGNUM);
16549
16550 /* Do not set RTX_FRAME_RELATED_P on this insn.
16551 The dwarf stack unwinding code only wants to see one
16552 stack decrement per function, and this is not it. If
16553 this instruction is labeled as being part of the frame
16554 creation sequence then dwarf2out_frame_debug_expr will
16555 die when it encounters the assignment of IP to FP
16556 later on, since the use of SP here establishes SP as
16557 the CFA register and not IP.
16558
16559 Anyway this instruction is not really part of the stack
16560 frame creation although it is part of the prologue. */
16561 }
16562 else if (IS_NESTED (func_type))
16563 {
16564 /* The Static chain register is the same as the IP register
16565 used as a scratch register during stack frame creation.
16566 To get around this need to find somewhere to store IP
16567 whilst the frame is being created. We try the following
16568 places in order:
16569
16570 1. The last argument register.
16571 2. A slot on the stack above the frame. (This only
16572 works if the function is not a varargs function).
16573 3. Register r3, after pushing the argument registers
16574 onto the stack.
16575
16576 Note - we only need to tell the dwarf2 backend about the SP
16577 adjustment in the second variant; the static chain register
16578 doesn't need to be unwound, as it doesn't contain a value
16579 inherited from the caller. */
16580
16581 if (df_regs_ever_live_p (3) == false)
16582 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16583 else if (args_to_push == 0)
16584 {
16585 rtx dwarf;
16586
16587 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16588 saved_regs += 4;
16589
16590 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16591 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16592 fp_offset = 4;
16593
16594 /* Just tell the dwarf backend that we adjusted SP. */
16595 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16596 plus_constant (Pmode, stack_pointer_rtx,
16597 -fp_offset));
16598 RTX_FRAME_RELATED_P (insn) = 1;
16599 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16600 }
16601 else
16602 {
16603 /* Store the args on the stack. */
16604 if (cfun->machine->uses_anonymous_args)
16605 insn = emit_multi_reg_push
16606 ((0xf0 >> (args_to_push / 4)) & 0xf);
16607 else
16608 insn = emit_insn
16609 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16610 GEN_INT (- args_to_push)));
16611
16612 RTX_FRAME_RELATED_P (insn) = 1;
16613
16614 saved_pretend_args = 1;
16615 fp_offset = args_to_push;
16616 args_to_push = 0;
16617
16618 /* Now reuse r3 to preserve IP. */
16619 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16620 }
16621 }
16622
16623 insn = emit_set_insn (ip_rtx,
16624 plus_constant (Pmode, stack_pointer_rtx,
16625 fp_offset));
16626 RTX_FRAME_RELATED_P (insn) = 1;
16627 }
16628
16629 if (args_to_push)
16630 {
16631 /* Push the argument registers, or reserve space for them. */
16632 if (cfun->machine->uses_anonymous_args)
16633 insn = emit_multi_reg_push
16634 ((0xf0 >> (args_to_push / 4)) & 0xf);
16635 else
16636 insn = emit_insn
16637 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16638 GEN_INT (- args_to_push)));
16639 RTX_FRAME_RELATED_P (insn) = 1;
16640 }
16641
16642 /* If this is an interrupt service routine, and the link register
16643 is going to be pushed, and we're not generating extra
16644 push of IP (needed when frame is needed and frame layout if apcs),
16645 subtracting four from LR now will mean that the function return
16646 can be done with a single instruction. */
16647 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16648 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16649 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16650 && TARGET_ARM)
16651 {
16652 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16653
16654 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16655 }
16656
16657 if (live_regs_mask)
16658 {
16659 saved_regs += bit_count (live_regs_mask) * 4;
16660 if (optimize_size && !frame_pointer_needed
16661 && saved_regs == offsets->saved_regs - offsets->saved_args)
16662 {
16663 /* If no coprocessor registers are being pushed and we don't have
16664 to worry about a frame pointer then push extra registers to
16665 create the stack frame. This is done is a way that does not
16666 alter the frame layout, so is independent of the epilogue. */
16667 int n;
16668 int frame;
16669 n = 0;
16670 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16671 n++;
16672 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16673 if (frame && n * 4 >= frame)
16674 {
16675 n = frame / 4;
16676 live_regs_mask |= (1 << n) - 1;
16677 saved_regs += frame;
16678 }
16679 }
16680 insn = emit_multi_reg_push (live_regs_mask);
16681 RTX_FRAME_RELATED_P (insn) = 1;
16682 }
16683
16684 if (! IS_VOLATILE (func_type))
16685 saved_regs += arm_save_coproc_regs ();
16686
16687 if (frame_pointer_needed && TARGET_ARM)
16688 {
16689 /* Create the new frame pointer. */
16690 if (TARGET_APCS_FRAME)
16691 {
16692 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16693 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16694 RTX_FRAME_RELATED_P (insn) = 1;
16695
16696 if (IS_NESTED (func_type))
16697 {
16698 /* Recover the static chain register. */
16699 if (!df_regs_ever_live_p (3)
16700 || saved_pretend_args)
16701 insn = gen_rtx_REG (SImode, 3);
16702 else /* if (crtl->args.pretend_args_size == 0) */
16703 {
16704 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16705 insn = gen_frame_mem (SImode, insn);
16706 }
16707 emit_set_insn (ip_rtx, insn);
16708 /* Add a USE to stop propagate_one_insn() from barfing. */
16709 emit_insn (gen_prologue_use (ip_rtx));
16710 }
16711 }
16712 else
16713 {
16714 insn = GEN_INT (saved_regs - 4);
16715 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16716 stack_pointer_rtx, insn));
16717 RTX_FRAME_RELATED_P (insn) = 1;
16718 }
16719 }
16720
16721 if (flag_stack_usage_info)
16722 current_function_static_stack_size
16723 = offsets->outgoing_args - offsets->saved_args;
16724
16725 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16726 {
16727 /* This add can produce multiple insns for a large constant, so we
16728 need to get tricky. */
16729 rtx last = get_last_insn ();
16730
16731 amount = GEN_INT (offsets->saved_args + saved_regs
16732 - offsets->outgoing_args);
16733
16734 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16735 amount));
16736 do
16737 {
16738 last = last ? NEXT_INSN (last) : get_insns ();
16739 RTX_FRAME_RELATED_P (last) = 1;
16740 }
16741 while (last != insn);
16742
16743 /* If the frame pointer is needed, emit a special barrier that
16744 will prevent the scheduler from moving stores to the frame
16745 before the stack adjustment. */
16746 if (frame_pointer_needed)
16747 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16748 hard_frame_pointer_rtx));
16749 }
16750
16751
16752 if (frame_pointer_needed && TARGET_THUMB2)
16753 thumb_set_frame_pointer (offsets);
16754
16755 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16756 {
16757 unsigned long mask;
16758
16759 mask = live_regs_mask;
16760 mask &= THUMB2_WORK_REGS;
16761 if (!IS_NESTED (func_type))
16762 mask |= (1 << IP_REGNUM);
16763 arm_load_pic_register (mask);
16764 }
16765
16766 /* If we are profiling, make sure no instructions are scheduled before
16767 the call to mcount. Similarly if the user has requested no
16768 scheduling in the prolog. Similarly if we want non-call exceptions
16769 using the EABI unwinder, to prevent faulting instructions from being
16770 swapped with a stack adjustment. */
16771 if (crtl->profile || !TARGET_SCHED_PROLOG
16772 || (arm_except_unwind_info (&global_options) == UI_TARGET
16773 && cfun->can_throw_non_call_exceptions))
16774 emit_insn (gen_blockage ());
16775
16776 /* If the link register is being kept alive, with the return address in it,
16777 then make sure that it does not get reused by the ce2 pass. */
16778 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16779 cfun->machine->lr_save_eliminated = 1;
16780 }
16781 \f
16782 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16783 static void
16784 arm_print_condition (FILE *stream)
16785 {
16786 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16787 {
16788 /* Branch conversion is not implemented for Thumb-2. */
16789 if (TARGET_THUMB)
16790 {
16791 output_operand_lossage ("predicated Thumb instruction");
16792 return;
16793 }
16794 if (current_insn_predicate != NULL)
16795 {
16796 output_operand_lossage
16797 ("predicated instruction in conditional sequence");
16798 return;
16799 }
16800
16801 fputs (arm_condition_codes[arm_current_cc], stream);
16802 }
16803 else if (current_insn_predicate)
16804 {
16805 enum arm_cond_code code;
16806
16807 if (TARGET_THUMB1)
16808 {
16809 output_operand_lossage ("predicated Thumb instruction");
16810 return;
16811 }
16812
16813 code = get_arm_condition_code (current_insn_predicate);
16814 fputs (arm_condition_codes[code], stream);
16815 }
16816 }
16817
16818
16819 /* If CODE is 'd', then the X is a condition operand and the instruction
16820 should only be executed if the condition is true.
16821 if CODE is 'D', then the X is a condition operand and the instruction
16822 should only be executed if the condition is false: however, if the mode
16823 of the comparison is CCFPEmode, then always execute the instruction -- we
16824 do this because in these circumstances !GE does not necessarily imply LT;
16825 in these cases the instruction pattern will take care to make sure that
16826 an instruction containing %d will follow, thereby undoing the effects of
16827 doing this instruction unconditionally.
16828 If CODE is 'N' then X is a floating point operand that must be negated
16829 before output.
16830 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16831 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16832 static void
16833 arm_print_operand (FILE *stream, rtx x, int code)
16834 {
16835 switch (code)
16836 {
16837 case '@':
16838 fputs (ASM_COMMENT_START, stream);
16839 return;
16840
16841 case '_':
16842 fputs (user_label_prefix, stream);
16843 return;
16844
16845 case '|':
16846 fputs (REGISTER_PREFIX, stream);
16847 return;
16848
16849 case '?':
16850 arm_print_condition (stream);
16851 return;
16852
16853 case '(':
16854 /* Nothing in unified syntax, otherwise the current condition code. */
16855 if (!TARGET_UNIFIED_ASM)
16856 arm_print_condition (stream);
16857 break;
16858
16859 case ')':
16860 /* The current condition code in unified syntax, otherwise nothing. */
16861 if (TARGET_UNIFIED_ASM)
16862 arm_print_condition (stream);
16863 break;
16864
16865 case '.':
16866 /* The current condition code for a condition code setting instruction.
16867 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16868 if (TARGET_UNIFIED_ASM)
16869 {
16870 fputc('s', stream);
16871 arm_print_condition (stream);
16872 }
16873 else
16874 {
16875 arm_print_condition (stream);
16876 fputc('s', stream);
16877 }
16878 return;
16879
16880 case '!':
16881 /* If the instruction is conditionally executed then print
16882 the current condition code, otherwise print 's'. */
16883 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16884 if (current_insn_predicate)
16885 arm_print_condition (stream);
16886 else
16887 fputc('s', stream);
16888 break;
16889
16890 /* %# is a "break" sequence. It doesn't output anything, but is used to
16891 separate e.g. operand numbers from following text, if that text consists
16892 of further digits which we don't want to be part of the operand
16893 number. */
16894 case '#':
16895 return;
16896
16897 case 'N':
16898 {
16899 REAL_VALUE_TYPE r;
16900 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16901 r = real_value_negate (&r);
16902 fprintf (stream, "%s", fp_const_from_val (&r));
16903 }
16904 return;
16905
16906 /* An integer or symbol address without a preceding # sign. */
16907 case 'c':
16908 switch (GET_CODE (x))
16909 {
16910 case CONST_INT:
16911 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16912 break;
16913
16914 case SYMBOL_REF:
16915 output_addr_const (stream, x);
16916 break;
16917
16918 case CONST:
16919 if (GET_CODE (XEXP (x, 0)) == PLUS
16920 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16921 {
16922 output_addr_const (stream, x);
16923 break;
16924 }
16925 /* Fall through. */
16926
16927 default:
16928 output_operand_lossage ("Unsupported operand for code '%c'", code);
16929 }
16930 return;
16931
16932 /* An integer that we want to print in HEX. */
16933 case 'x':
16934 switch (GET_CODE (x))
16935 {
16936 case CONST_INT:
16937 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16938 break;
16939
16940 default:
16941 output_operand_lossage ("Unsupported operand for code '%c'", code);
16942 }
16943 return;
16944
16945 case 'B':
16946 if (CONST_INT_P (x))
16947 {
16948 HOST_WIDE_INT val;
16949 val = ARM_SIGN_EXTEND (~INTVAL (x));
16950 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16951 }
16952 else
16953 {
16954 putc ('~', stream);
16955 output_addr_const (stream, x);
16956 }
16957 return;
16958
16959 case 'L':
16960 /* The low 16 bits of an immediate constant. */
16961 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16962 return;
16963
16964 case 'i':
16965 fprintf (stream, "%s", arithmetic_instr (x, 1));
16966 return;
16967
16968 case 'I':
16969 fprintf (stream, "%s", arithmetic_instr (x, 0));
16970 return;
16971
16972 case 'S':
16973 {
16974 HOST_WIDE_INT val;
16975 const char *shift;
16976
16977 if (!shift_operator (x, SImode))
16978 {
16979 output_operand_lossage ("invalid shift operand");
16980 break;
16981 }
16982
16983 shift = shift_op (x, &val);
16984
16985 if (shift)
16986 {
16987 fprintf (stream, ", %s ", shift);
16988 if (val == -1)
16989 arm_print_operand (stream, XEXP (x, 1), 0);
16990 else
16991 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16992 }
16993 }
16994 return;
16995
16996 /* An explanation of the 'Q', 'R' and 'H' register operands:
16997
16998 In a pair of registers containing a DI or DF value the 'Q'
16999 operand returns the register number of the register containing
17000 the least significant part of the value. The 'R' operand returns
17001 the register number of the register containing the most
17002 significant part of the value.
17003
17004 The 'H' operand returns the higher of the two register numbers.
17005 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17006 same as the 'Q' operand, since the most significant part of the
17007 value is held in the lower number register. The reverse is true
17008 on systems where WORDS_BIG_ENDIAN is false.
17009
17010 The purpose of these operands is to distinguish between cases
17011 where the endian-ness of the values is important (for example
17012 when they are added together), and cases where the endian-ness
17013 is irrelevant, but the order of register operations is important.
17014 For example when loading a value from memory into a register
17015 pair, the endian-ness does not matter. Provided that the value
17016 from the lower memory address is put into the lower numbered
17017 register, and the value from the higher address is put into the
17018 higher numbered register, the load will work regardless of whether
17019 the value being loaded is big-wordian or little-wordian. The
17020 order of the two register loads can matter however, if the address
17021 of the memory location is actually held in one of the registers
17022 being overwritten by the load.
17023
17024 The 'Q' and 'R' constraints are also available for 64-bit
17025 constants. */
17026 case 'Q':
17027 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17028 {
17029 rtx part = gen_lowpart (SImode, x);
17030 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17031 return;
17032 }
17033
17034 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17035 {
17036 output_operand_lossage ("invalid operand for code '%c'", code);
17037 return;
17038 }
17039
17040 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17041 return;
17042
17043 case 'R':
17044 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17045 {
17046 enum machine_mode mode = GET_MODE (x);
17047 rtx part;
17048
17049 if (mode == VOIDmode)
17050 mode = DImode;
17051 part = gen_highpart_mode (SImode, mode, x);
17052 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17053 return;
17054 }
17055
17056 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17057 {
17058 output_operand_lossage ("invalid operand for code '%c'", code);
17059 return;
17060 }
17061
17062 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17063 return;
17064
17065 case 'H':
17066 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17067 {
17068 output_operand_lossage ("invalid operand for code '%c'", code);
17069 return;
17070 }
17071
17072 asm_fprintf (stream, "%r", REGNO (x) + 1);
17073 return;
17074
17075 case 'J':
17076 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17077 {
17078 output_operand_lossage ("invalid operand for code '%c'", code);
17079 return;
17080 }
17081
17082 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17083 return;
17084
17085 case 'K':
17086 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17087 {
17088 output_operand_lossage ("invalid operand for code '%c'", code);
17089 return;
17090 }
17091
17092 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17093 return;
17094
17095 case 'm':
17096 asm_fprintf (stream, "%r",
17097 REG_P (XEXP (x, 0))
17098 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17099 return;
17100
17101 case 'M':
17102 asm_fprintf (stream, "{%r-%r}",
17103 REGNO (x),
17104 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17105 return;
17106
17107 /* Like 'M', but writing doubleword vector registers, for use by Neon
17108 insns. */
17109 case 'h':
17110 {
17111 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17112 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17113 if (numregs == 1)
17114 asm_fprintf (stream, "{d%d}", regno);
17115 else
17116 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17117 }
17118 return;
17119
17120 case 'd':
17121 /* CONST_TRUE_RTX means always -- that's the default. */
17122 if (x == const_true_rtx)
17123 return;
17124
17125 if (!COMPARISON_P (x))
17126 {
17127 output_operand_lossage ("invalid operand for code '%c'", code);
17128 return;
17129 }
17130
17131 fputs (arm_condition_codes[get_arm_condition_code (x)],
17132 stream);
17133 return;
17134
17135 case 'D':
17136 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17137 want to do that. */
17138 if (x == const_true_rtx)
17139 {
17140 output_operand_lossage ("instruction never executed");
17141 return;
17142 }
17143 if (!COMPARISON_P (x))
17144 {
17145 output_operand_lossage ("invalid operand for code '%c'", code);
17146 return;
17147 }
17148
17149 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17150 (get_arm_condition_code (x))],
17151 stream);
17152 return;
17153
17154 case 's':
17155 case 'V':
17156 case 'W':
17157 case 'X':
17158 case 'Y':
17159 case 'Z':
17160 /* Former Maverick support, removed after GCC-4.7. */
17161 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17162 return;
17163
17164 case 'U':
17165 if (!REG_P (x)
17166 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17167 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17168 /* Bad value for wCG register number. */
17169 {
17170 output_operand_lossage ("invalid operand for code '%c'", code);
17171 return;
17172 }
17173
17174 else
17175 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17176 return;
17177
17178 /* Print an iWMMXt control register name. */
17179 case 'w':
17180 if (!CONST_INT_P (x)
17181 || INTVAL (x) < 0
17182 || INTVAL (x) >= 16)
17183 /* Bad value for wC register number. */
17184 {
17185 output_operand_lossage ("invalid operand for code '%c'", code);
17186 return;
17187 }
17188
17189 else
17190 {
17191 static const char * wc_reg_names [16] =
17192 {
17193 "wCID", "wCon", "wCSSF", "wCASF",
17194 "wC4", "wC5", "wC6", "wC7",
17195 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17196 "wC12", "wC13", "wC14", "wC15"
17197 };
17198
17199 fprintf (stream, wc_reg_names [INTVAL (x)]);
17200 }
17201 return;
17202
17203 /* Print the high single-precision register of a VFP double-precision
17204 register. */
17205 case 'p':
17206 {
17207 int mode = GET_MODE (x);
17208 int regno;
17209
17210 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
17211 {
17212 output_operand_lossage ("invalid operand for code '%c'", code);
17213 return;
17214 }
17215
17216 regno = REGNO (x);
17217 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17218 {
17219 output_operand_lossage ("invalid operand for code '%c'", code);
17220 return;
17221 }
17222
17223 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17224 }
17225 return;
17226
17227 /* Print a VFP/Neon double precision or quad precision register name. */
17228 case 'P':
17229 case 'q':
17230 {
17231 int mode = GET_MODE (x);
17232 int is_quad = (code == 'q');
17233 int regno;
17234
17235 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17236 {
17237 output_operand_lossage ("invalid operand for code '%c'", code);
17238 return;
17239 }
17240
17241 if (!REG_P (x)
17242 || !IS_VFP_REGNUM (REGNO (x)))
17243 {
17244 output_operand_lossage ("invalid operand for code '%c'", code);
17245 return;
17246 }
17247
17248 regno = REGNO (x);
17249 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17250 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17251 {
17252 output_operand_lossage ("invalid operand for code '%c'", code);
17253 return;
17254 }
17255
17256 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17257 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17258 }
17259 return;
17260
17261 /* These two codes print the low/high doubleword register of a Neon quad
17262 register, respectively. For pair-structure types, can also print
17263 low/high quadword registers. */
17264 case 'e':
17265 case 'f':
17266 {
17267 int mode = GET_MODE (x);
17268 int regno;
17269
17270 if ((GET_MODE_SIZE (mode) != 16
17271 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
17272 {
17273 output_operand_lossage ("invalid operand for code '%c'", code);
17274 return;
17275 }
17276
17277 regno = REGNO (x);
17278 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17279 {
17280 output_operand_lossage ("invalid operand for code '%c'", code);
17281 return;
17282 }
17283
17284 if (GET_MODE_SIZE (mode) == 16)
17285 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17286 + (code == 'f' ? 1 : 0));
17287 else
17288 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17289 + (code == 'f' ? 1 : 0));
17290 }
17291 return;
17292
17293 /* Print a VFPv3 floating-point constant, represented as an integer
17294 index. */
17295 case 'G':
17296 {
17297 int index = vfp3_const_double_index (x);
17298 gcc_assert (index != -1);
17299 fprintf (stream, "%d", index);
17300 }
17301 return;
17302
17303 /* Print bits representing opcode features for Neon.
17304
17305 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17306 and polynomials as unsigned.
17307
17308 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17309
17310 Bit 2 is 1 for rounding functions, 0 otherwise. */
17311
17312 /* Identify the type as 's', 'u', 'p' or 'f'. */
17313 case 'T':
17314 {
17315 HOST_WIDE_INT bits = INTVAL (x);
17316 fputc ("uspf"[bits & 3], stream);
17317 }
17318 return;
17319
17320 /* Likewise, but signed and unsigned integers are both 'i'. */
17321 case 'F':
17322 {
17323 HOST_WIDE_INT bits = INTVAL (x);
17324 fputc ("iipf"[bits & 3], stream);
17325 }
17326 return;
17327
17328 /* As for 'T', but emit 'u' instead of 'p'. */
17329 case 't':
17330 {
17331 HOST_WIDE_INT bits = INTVAL (x);
17332 fputc ("usuf"[bits & 3], stream);
17333 }
17334 return;
17335
17336 /* Bit 2: rounding (vs none). */
17337 case 'O':
17338 {
17339 HOST_WIDE_INT bits = INTVAL (x);
17340 fputs ((bits & 4) != 0 ? "r" : "", stream);
17341 }
17342 return;
17343
17344 /* Memory operand for vld1/vst1 instruction. */
17345 case 'A':
17346 {
17347 rtx addr;
17348 bool postinc = FALSE;
17349 unsigned align, memsize, align_bits;
17350
17351 gcc_assert (MEM_P (x));
17352 addr = XEXP (x, 0);
17353 if (GET_CODE (addr) == POST_INC)
17354 {
17355 postinc = 1;
17356 addr = XEXP (addr, 0);
17357 }
17358 asm_fprintf (stream, "[%r", REGNO (addr));
17359
17360 /* We know the alignment of this access, so we can emit a hint in the
17361 instruction (for some alignments) as an aid to the memory subsystem
17362 of the target. */
17363 align = MEM_ALIGN (x) >> 3;
17364 memsize = MEM_SIZE (x);
17365
17366 /* Only certain alignment specifiers are supported by the hardware. */
17367 if (memsize == 32 && (align % 32) == 0)
17368 align_bits = 256;
17369 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17370 align_bits = 128;
17371 else if (memsize >= 8 && (align % 8) == 0)
17372 align_bits = 64;
17373 else
17374 align_bits = 0;
17375
17376 if (align_bits != 0)
17377 asm_fprintf (stream, ":%d", align_bits);
17378
17379 asm_fprintf (stream, "]");
17380
17381 if (postinc)
17382 fputs("!", stream);
17383 }
17384 return;
17385
17386 case 'C':
17387 {
17388 rtx addr;
17389
17390 gcc_assert (MEM_P (x));
17391 addr = XEXP (x, 0);
17392 gcc_assert (REG_P (addr));
17393 asm_fprintf (stream, "[%r]", REGNO (addr));
17394 }
17395 return;
17396
17397 /* Translate an S register number into a D register number and element index. */
17398 case 'y':
17399 {
17400 int mode = GET_MODE (x);
17401 int regno;
17402
17403 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
17404 {
17405 output_operand_lossage ("invalid operand for code '%c'", code);
17406 return;
17407 }
17408
17409 regno = REGNO (x);
17410 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17411 {
17412 output_operand_lossage ("invalid operand for code '%c'", code);
17413 return;
17414 }
17415
17416 regno = regno - FIRST_VFP_REGNUM;
17417 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17418 }
17419 return;
17420
17421 case 'v':
17422 gcc_assert (CONST_DOUBLE_P (x));
17423 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17424 return;
17425
17426 /* Register specifier for vld1.16/vst1.16. Translate the S register
17427 number into a D register number and element index. */
17428 case 'z':
17429 {
17430 int mode = GET_MODE (x);
17431 int regno;
17432
17433 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
17434 {
17435 output_operand_lossage ("invalid operand for code '%c'", code);
17436 return;
17437 }
17438
17439 regno = REGNO (x);
17440 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17441 {
17442 output_operand_lossage ("invalid operand for code '%c'", code);
17443 return;
17444 }
17445
17446 regno = regno - FIRST_VFP_REGNUM;
17447 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17448 }
17449 return;
17450
17451 default:
17452 if (x == 0)
17453 {
17454 output_operand_lossage ("missing operand");
17455 return;
17456 }
17457
17458 switch (GET_CODE (x))
17459 {
17460 case REG:
17461 asm_fprintf (stream, "%r", REGNO (x));
17462 break;
17463
17464 case MEM:
17465 output_memory_reference_mode = GET_MODE (x);
17466 output_address (XEXP (x, 0));
17467 break;
17468
17469 case CONST_DOUBLE:
17470 if (TARGET_NEON)
17471 {
17472 char fpstr[20];
17473 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17474 sizeof (fpstr), 0, 1);
17475 fprintf (stream, "#%s", fpstr);
17476 }
17477 else
17478 fprintf (stream, "#%s", fp_immediate_constant (x));
17479 break;
17480
17481 default:
17482 gcc_assert (GET_CODE (x) != NEG);
17483 fputc ('#', stream);
17484 if (GET_CODE (x) == HIGH)
17485 {
17486 fputs (":lower16:", stream);
17487 x = XEXP (x, 0);
17488 }
17489
17490 output_addr_const (stream, x);
17491 break;
17492 }
17493 }
17494 }
17495 \f
17496 /* Target hook for printing a memory address. */
17497 static void
17498 arm_print_operand_address (FILE *stream, rtx x)
17499 {
17500 if (TARGET_32BIT)
17501 {
17502 int is_minus = GET_CODE (x) == MINUS;
17503
17504 if (REG_P (x))
17505 asm_fprintf (stream, "[%r]", REGNO (x));
17506 else if (GET_CODE (x) == PLUS || is_minus)
17507 {
17508 rtx base = XEXP (x, 0);
17509 rtx index = XEXP (x, 1);
17510 HOST_WIDE_INT offset = 0;
17511 if (!REG_P (base)
17512 || (REG_P (index) && REGNO (index) == SP_REGNUM))
17513 {
17514 /* Ensure that BASE is a register. */
17515 /* (one of them must be). */
17516 /* Also ensure the SP is not used as in index register. */
17517 rtx temp = base;
17518 base = index;
17519 index = temp;
17520 }
17521 switch (GET_CODE (index))
17522 {
17523 case CONST_INT:
17524 offset = INTVAL (index);
17525 if (is_minus)
17526 offset = -offset;
17527 asm_fprintf (stream, "[%r, #%wd]",
17528 REGNO (base), offset);
17529 break;
17530
17531 case REG:
17532 asm_fprintf (stream, "[%r, %s%r]",
17533 REGNO (base), is_minus ? "-" : "",
17534 REGNO (index));
17535 break;
17536
17537 case MULT:
17538 case ASHIFTRT:
17539 case LSHIFTRT:
17540 case ASHIFT:
17541 case ROTATERT:
17542 {
17543 asm_fprintf (stream, "[%r, %s%r",
17544 REGNO (base), is_minus ? "-" : "",
17545 REGNO (XEXP (index, 0)));
17546 arm_print_operand (stream, index, 'S');
17547 fputs ("]", stream);
17548 break;
17549 }
17550
17551 default:
17552 gcc_unreachable ();
17553 }
17554 }
17555 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17556 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17557 {
17558 extern enum machine_mode output_memory_reference_mode;
17559
17560 gcc_assert (REG_P (XEXP (x, 0)));
17561
17562 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17563 asm_fprintf (stream, "[%r, #%s%d]!",
17564 REGNO (XEXP (x, 0)),
17565 GET_CODE (x) == PRE_DEC ? "-" : "",
17566 GET_MODE_SIZE (output_memory_reference_mode));
17567 else
17568 asm_fprintf (stream, "[%r], #%s%d",
17569 REGNO (XEXP (x, 0)),
17570 GET_CODE (x) == POST_DEC ? "-" : "",
17571 GET_MODE_SIZE (output_memory_reference_mode));
17572 }
17573 else if (GET_CODE (x) == PRE_MODIFY)
17574 {
17575 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17576 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17577 asm_fprintf (stream, "#%wd]!",
17578 INTVAL (XEXP (XEXP (x, 1), 1)));
17579 else
17580 asm_fprintf (stream, "%r]!",
17581 REGNO (XEXP (XEXP (x, 1), 1)));
17582 }
17583 else if (GET_CODE (x) == POST_MODIFY)
17584 {
17585 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17586 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17587 asm_fprintf (stream, "#%wd",
17588 INTVAL (XEXP (XEXP (x, 1), 1)));
17589 else
17590 asm_fprintf (stream, "%r",
17591 REGNO (XEXP (XEXP (x, 1), 1)));
17592 }
17593 else output_addr_const (stream, x);
17594 }
17595 else
17596 {
17597 if (REG_P (x))
17598 asm_fprintf (stream, "[%r]", REGNO (x));
17599 else if (GET_CODE (x) == POST_INC)
17600 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17601 else if (GET_CODE (x) == PLUS)
17602 {
17603 gcc_assert (REG_P (XEXP (x, 0)));
17604 if (CONST_INT_P (XEXP (x, 1)))
17605 asm_fprintf (stream, "[%r, #%wd]",
17606 REGNO (XEXP (x, 0)),
17607 INTVAL (XEXP (x, 1)));
17608 else
17609 asm_fprintf (stream, "[%r, %r]",
17610 REGNO (XEXP (x, 0)),
17611 REGNO (XEXP (x, 1)));
17612 }
17613 else
17614 output_addr_const (stream, x);
17615 }
17616 }
17617 \f
17618 /* Target hook for indicating whether a punctuation character for
17619 TARGET_PRINT_OPERAND is valid. */
17620 static bool
17621 arm_print_operand_punct_valid_p (unsigned char code)
17622 {
17623 return (code == '@' || code == '|' || code == '.'
17624 || code == '(' || code == ')' || code == '#'
17625 || (TARGET_32BIT && (code == '?'))
17626 || (TARGET_THUMB2 && (code == '!'))
17627 || (TARGET_THUMB && (code == '_')));
17628 }
17629 \f
17630 /* Target hook for assembling integer objects. The ARM version needs to
17631 handle word-sized values specially. */
17632 static bool
17633 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17634 {
17635 enum machine_mode mode;
17636
17637 if (size == UNITS_PER_WORD && aligned_p)
17638 {
17639 fputs ("\t.word\t", asm_out_file);
17640 output_addr_const (asm_out_file, x);
17641
17642 /* Mark symbols as position independent. We only do this in the
17643 .text segment, not in the .data segment. */
17644 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17645 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17646 {
17647 /* See legitimize_pic_address for an explanation of the
17648 TARGET_VXWORKS_RTP check. */
17649 if (TARGET_VXWORKS_RTP
17650 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17651 fputs ("(GOT)", asm_out_file);
17652 else
17653 fputs ("(GOTOFF)", asm_out_file);
17654 }
17655 fputc ('\n', asm_out_file);
17656 return true;
17657 }
17658
17659 mode = GET_MODE (x);
17660
17661 if (arm_vector_mode_supported_p (mode))
17662 {
17663 int i, units;
17664
17665 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17666
17667 units = CONST_VECTOR_NUNITS (x);
17668 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17669
17670 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17671 for (i = 0; i < units; i++)
17672 {
17673 rtx elt = CONST_VECTOR_ELT (x, i);
17674 assemble_integer
17675 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17676 }
17677 else
17678 for (i = 0; i < units; i++)
17679 {
17680 rtx elt = CONST_VECTOR_ELT (x, i);
17681 REAL_VALUE_TYPE rval;
17682
17683 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17684
17685 assemble_real
17686 (rval, GET_MODE_INNER (mode),
17687 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17688 }
17689
17690 return true;
17691 }
17692
17693 return default_assemble_integer (x, size, aligned_p);
17694 }
17695
17696 static void
17697 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17698 {
17699 section *s;
17700
17701 if (!TARGET_AAPCS_BASED)
17702 {
17703 (is_ctor ?
17704 default_named_section_asm_out_constructor
17705 : default_named_section_asm_out_destructor) (symbol, priority);
17706 return;
17707 }
17708
17709 /* Put these in the .init_array section, using a special relocation. */
17710 if (priority != DEFAULT_INIT_PRIORITY)
17711 {
17712 char buf[18];
17713 sprintf (buf, "%s.%.5u",
17714 is_ctor ? ".init_array" : ".fini_array",
17715 priority);
17716 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17717 }
17718 else if (is_ctor)
17719 s = ctors_section;
17720 else
17721 s = dtors_section;
17722
17723 switch_to_section (s);
17724 assemble_align (POINTER_SIZE);
17725 fputs ("\t.word\t", asm_out_file);
17726 output_addr_const (asm_out_file, symbol);
17727 fputs ("(target1)\n", asm_out_file);
17728 }
17729
17730 /* Add a function to the list of static constructors. */
17731
17732 static void
17733 arm_elf_asm_constructor (rtx symbol, int priority)
17734 {
17735 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17736 }
17737
17738 /* Add a function to the list of static destructors. */
17739
17740 static void
17741 arm_elf_asm_destructor (rtx symbol, int priority)
17742 {
17743 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17744 }
17745 \f
17746 /* A finite state machine takes care of noticing whether or not instructions
17747 can be conditionally executed, and thus decrease execution time and code
17748 size by deleting branch instructions. The fsm is controlled by
17749 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17750
17751 /* The state of the fsm controlling condition codes are:
17752 0: normal, do nothing special
17753 1: make ASM_OUTPUT_OPCODE not output this instruction
17754 2: make ASM_OUTPUT_OPCODE not output this instruction
17755 3: make instructions conditional
17756 4: make instructions conditional
17757
17758 State transitions (state->state by whom under condition):
17759 0 -> 1 final_prescan_insn if the `target' is a label
17760 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17761 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17762 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17763 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17764 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17765 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17766 (the target insn is arm_target_insn).
17767
17768 If the jump clobbers the conditions then we use states 2 and 4.
17769
17770 A similar thing can be done with conditional return insns.
17771
17772 XXX In case the `target' is an unconditional branch, this conditionalising
17773 of the instructions always reduces code size, but not always execution
17774 time. But then, I want to reduce the code size to somewhere near what
17775 /bin/cc produces. */
17776
17777 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17778 instructions. When a COND_EXEC instruction is seen the subsequent
17779 instructions are scanned so that multiple conditional instructions can be
17780 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17781 specify the length and true/false mask for the IT block. These will be
17782 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17783
17784 /* Returns the index of the ARM condition code string in
17785 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17786 COMPARISON should be an rtx like `(eq (...) (...))'. */
17787
17788 enum arm_cond_code
17789 maybe_get_arm_condition_code (rtx comparison)
17790 {
17791 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17792 enum arm_cond_code code;
17793 enum rtx_code comp_code = GET_CODE (comparison);
17794
17795 if (GET_MODE_CLASS (mode) != MODE_CC)
17796 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17797 XEXP (comparison, 1));
17798
17799 switch (mode)
17800 {
17801 case CC_DNEmode: code = ARM_NE; goto dominance;
17802 case CC_DEQmode: code = ARM_EQ; goto dominance;
17803 case CC_DGEmode: code = ARM_GE; goto dominance;
17804 case CC_DGTmode: code = ARM_GT; goto dominance;
17805 case CC_DLEmode: code = ARM_LE; goto dominance;
17806 case CC_DLTmode: code = ARM_LT; goto dominance;
17807 case CC_DGEUmode: code = ARM_CS; goto dominance;
17808 case CC_DGTUmode: code = ARM_HI; goto dominance;
17809 case CC_DLEUmode: code = ARM_LS; goto dominance;
17810 case CC_DLTUmode: code = ARM_CC;
17811
17812 dominance:
17813 if (comp_code == EQ)
17814 return ARM_INVERSE_CONDITION_CODE (code);
17815 if (comp_code == NE)
17816 return code;
17817 return ARM_NV;
17818
17819 case CC_NOOVmode:
17820 switch (comp_code)
17821 {
17822 case NE: return ARM_NE;
17823 case EQ: return ARM_EQ;
17824 case GE: return ARM_PL;
17825 case LT: return ARM_MI;
17826 default: return ARM_NV;
17827 }
17828
17829 case CC_Zmode:
17830 switch (comp_code)
17831 {
17832 case NE: return ARM_NE;
17833 case EQ: return ARM_EQ;
17834 default: return ARM_NV;
17835 }
17836
17837 case CC_Nmode:
17838 switch (comp_code)
17839 {
17840 case NE: return ARM_MI;
17841 case EQ: return ARM_PL;
17842 default: return ARM_NV;
17843 }
17844
17845 case CCFPEmode:
17846 case CCFPmode:
17847 /* We can handle all cases except UNEQ and LTGT. */
17848 switch (comp_code)
17849 {
17850 case GE: return ARM_GE;
17851 case GT: return ARM_GT;
17852 case LE: return ARM_LS;
17853 case LT: return ARM_MI;
17854 case NE: return ARM_NE;
17855 case EQ: return ARM_EQ;
17856 case ORDERED: return ARM_VC;
17857 case UNORDERED: return ARM_VS;
17858 case UNLT: return ARM_LT;
17859 case UNLE: return ARM_LE;
17860 case UNGT: return ARM_HI;
17861 case UNGE: return ARM_PL;
17862 /* UNEQ and LTGT do not have a representation. */
17863 case UNEQ: /* Fall through. */
17864 case LTGT: /* Fall through. */
17865 default: return ARM_NV;
17866 }
17867
17868 case CC_SWPmode:
17869 switch (comp_code)
17870 {
17871 case NE: return ARM_NE;
17872 case EQ: return ARM_EQ;
17873 case GE: return ARM_LE;
17874 case GT: return ARM_LT;
17875 case LE: return ARM_GE;
17876 case LT: return ARM_GT;
17877 case GEU: return ARM_LS;
17878 case GTU: return ARM_CC;
17879 case LEU: return ARM_CS;
17880 case LTU: return ARM_HI;
17881 default: return ARM_NV;
17882 }
17883
17884 case CC_Cmode:
17885 switch (comp_code)
17886 {
17887 case LTU: return ARM_CS;
17888 case GEU: return ARM_CC;
17889 default: return ARM_NV;
17890 }
17891
17892 case CC_CZmode:
17893 switch (comp_code)
17894 {
17895 case NE: return ARM_NE;
17896 case EQ: return ARM_EQ;
17897 case GEU: return ARM_CS;
17898 case GTU: return ARM_HI;
17899 case LEU: return ARM_LS;
17900 case LTU: return ARM_CC;
17901 default: return ARM_NV;
17902 }
17903
17904 case CC_NCVmode:
17905 switch (comp_code)
17906 {
17907 case GE: return ARM_GE;
17908 case LT: return ARM_LT;
17909 case GEU: return ARM_CS;
17910 case LTU: return ARM_CC;
17911 default: return ARM_NV;
17912 }
17913
17914 case CCmode:
17915 switch (comp_code)
17916 {
17917 case NE: return ARM_NE;
17918 case EQ: return ARM_EQ;
17919 case GE: return ARM_GE;
17920 case GT: return ARM_GT;
17921 case LE: return ARM_LE;
17922 case LT: return ARM_LT;
17923 case GEU: return ARM_CS;
17924 case GTU: return ARM_HI;
17925 case LEU: return ARM_LS;
17926 case LTU: return ARM_CC;
17927 default: return ARM_NV;
17928 }
17929
17930 default: gcc_unreachable ();
17931 }
17932 }
17933
17934 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17935 static enum arm_cond_code
17936 get_arm_condition_code (rtx comparison)
17937 {
17938 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17939 gcc_assert (code != ARM_NV);
17940 return code;
17941 }
17942
17943 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17944 instructions. */
17945 void
17946 thumb2_final_prescan_insn (rtx insn)
17947 {
17948 rtx first_insn = insn;
17949 rtx body = PATTERN (insn);
17950 rtx predicate;
17951 enum arm_cond_code code;
17952 int n;
17953 int mask;
17954
17955 /* Remove the previous insn from the count of insns to be output. */
17956 if (arm_condexec_count)
17957 arm_condexec_count--;
17958
17959 /* Nothing to do if we are already inside a conditional block. */
17960 if (arm_condexec_count)
17961 return;
17962
17963 if (GET_CODE (body) != COND_EXEC)
17964 return;
17965
17966 /* Conditional jumps are implemented directly. */
17967 if (JUMP_P (insn))
17968 return;
17969
17970 predicate = COND_EXEC_TEST (body);
17971 arm_current_cc = get_arm_condition_code (predicate);
17972
17973 n = get_attr_ce_count (insn);
17974 arm_condexec_count = 1;
17975 arm_condexec_mask = (1 << n) - 1;
17976 arm_condexec_masklen = n;
17977 /* See if subsequent instructions can be combined into the same block. */
17978 for (;;)
17979 {
17980 insn = next_nonnote_insn (insn);
17981
17982 /* Jumping into the middle of an IT block is illegal, so a label or
17983 barrier terminates the block. */
17984 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
17985 break;
17986
17987 body = PATTERN (insn);
17988 /* USE and CLOBBER aren't really insns, so just skip them. */
17989 if (GET_CODE (body) == USE
17990 || GET_CODE (body) == CLOBBER)
17991 continue;
17992
17993 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17994 if (GET_CODE (body) != COND_EXEC)
17995 break;
17996 /* Allow up to 4 conditionally executed instructions in a block. */
17997 n = get_attr_ce_count (insn);
17998 if (arm_condexec_masklen + n > 4)
17999 break;
18000
18001 predicate = COND_EXEC_TEST (body);
18002 code = get_arm_condition_code (predicate);
18003 mask = (1 << n) - 1;
18004 if (arm_current_cc == code)
18005 arm_condexec_mask |= (mask << arm_condexec_masklen);
18006 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18007 break;
18008
18009 arm_condexec_count++;
18010 arm_condexec_masklen += n;
18011
18012 /* A jump must be the last instruction in a conditional block. */
18013 if (JUMP_P (insn))
18014 break;
18015 }
18016 /* Restore recog_data (getting the attributes of other insns can
18017 destroy this array, but final.c assumes that it remains intact
18018 across this call). */
18019 extract_constrain_insn_cached (first_insn);
18020 }
18021
18022 void
18023 arm_final_prescan_insn (rtx insn)
18024 {
18025 /* BODY will hold the body of INSN. */
18026 rtx body = PATTERN (insn);
18027
18028 /* This will be 1 if trying to repeat the trick, and things need to be
18029 reversed if it appears to fail. */
18030 int reverse = 0;
18031
18032 /* If we start with a return insn, we only succeed if we find another one. */
18033 int seeking_return = 0;
18034 enum rtx_code return_code = UNKNOWN;
18035
18036 /* START_INSN will hold the insn from where we start looking. This is the
18037 first insn after the following code_label if REVERSE is true. */
18038 rtx start_insn = insn;
18039
18040 /* If in state 4, check if the target branch is reached, in order to
18041 change back to state 0. */
18042 if (arm_ccfsm_state == 4)
18043 {
18044 if (insn == arm_target_insn)
18045 {
18046 arm_target_insn = NULL;
18047 arm_ccfsm_state = 0;
18048 }
18049 return;
18050 }
18051
18052 /* If in state 3, it is possible to repeat the trick, if this insn is an
18053 unconditional branch to a label, and immediately following this branch
18054 is the previous target label which is only used once, and the label this
18055 branch jumps to is not too far off. */
18056 if (arm_ccfsm_state == 3)
18057 {
18058 if (simplejump_p (insn))
18059 {
18060 start_insn = next_nonnote_insn (start_insn);
18061 if (BARRIER_P (start_insn))
18062 {
18063 /* XXX Isn't this always a barrier? */
18064 start_insn = next_nonnote_insn (start_insn);
18065 }
18066 if (LABEL_P (start_insn)
18067 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18068 && LABEL_NUSES (start_insn) == 1)
18069 reverse = TRUE;
18070 else
18071 return;
18072 }
18073 else if (ANY_RETURN_P (body))
18074 {
18075 start_insn = next_nonnote_insn (start_insn);
18076 if (BARRIER_P (start_insn))
18077 start_insn = next_nonnote_insn (start_insn);
18078 if (LABEL_P (start_insn)
18079 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18080 && LABEL_NUSES (start_insn) == 1)
18081 {
18082 reverse = TRUE;
18083 seeking_return = 1;
18084 return_code = GET_CODE (body);
18085 }
18086 else
18087 return;
18088 }
18089 else
18090 return;
18091 }
18092
18093 gcc_assert (!arm_ccfsm_state || reverse);
18094 if (!JUMP_P (insn))
18095 return;
18096
18097 /* This jump might be paralleled with a clobber of the condition codes
18098 the jump should always come first */
18099 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18100 body = XVECEXP (body, 0, 0);
18101
18102 if (reverse
18103 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18104 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18105 {
18106 int insns_skipped;
18107 int fail = FALSE, succeed = FALSE;
18108 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18109 int then_not_else = TRUE;
18110 rtx this_insn = start_insn, label = 0;
18111
18112 /* Register the insn jumped to. */
18113 if (reverse)
18114 {
18115 if (!seeking_return)
18116 label = XEXP (SET_SRC (body), 0);
18117 }
18118 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18119 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18120 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18121 {
18122 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18123 then_not_else = FALSE;
18124 }
18125 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18126 {
18127 seeking_return = 1;
18128 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18129 }
18130 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18131 {
18132 seeking_return = 1;
18133 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18134 then_not_else = FALSE;
18135 }
18136 else
18137 gcc_unreachable ();
18138
18139 /* See how many insns this branch skips, and what kind of insns. If all
18140 insns are okay, and the label or unconditional branch to the same
18141 label is not too far away, succeed. */
18142 for (insns_skipped = 0;
18143 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18144 {
18145 rtx scanbody;
18146
18147 this_insn = next_nonnote_insn (this_insn);
18148 if (!this_insn)
18149 break;
18150
18151 switch (GET_CODE (this_insn))
18152 {
18153 case CODE_LABEL:
18154 /* Succeed if it is the target label, otherwise fail since
18155 control falls in from somewhere else. */
18156 if (this_insn == label)
18157 {
18158 arm_ccfsm_state = 1;
18159 succeed = TRUE;
18160 }
18161 else
18162 fail = TRUE;
18163 break;
18164
18165 case BARRIER:
18166 /* Succeed if the following insn is the target label.
18167 Otherwise fail.
18168 If return insns are used then the last insn in a function
18169 will be a barrier. */
18170 this_insn = next_nonnote_insn (this_insn);
18171 if (this_insn && this_insn == label)
18172 {
18173 arm_ccfsm_state = 1;
18174 succeed = TRUE;
18175 }
18176 else
18177 fail = TRUE;
18178 break;
18179
18180 case CALL_INSN:
18181 /* The AAPCS says that conditional calls should not be
18182 used since they make interworking inefficient (the
18183 linker can't transform BL<cond> into BLX). That's
18184 only a problem if the machine has BLX. */
18185 if (arm_arch5)
18186 {
18187 fail = TRUE;
18188 break;
18189 }
18190
18191 /* Succeed if the following insn is the target label, or
18192 if the following two insns are a barrier and the
18193 target label. */
18194 this_insn = next_nonnote_insn (this_insn);
18195 if (this_insn && BARRIER_P (this_insn))
18196 this_insn = next_nonnote_insn (this_insn);
18197
18198 if (this_insn && this_insn == label
18199 && insns_skipped < max_insns_skipped)
18200 {
18201 arm_ccfsm_state = 1;
18202 succeed = TRUE;
18203 }
18204 else
18205 fail = TRUE;
18206 break;
18207
18208 case JUMP_INSN:
18209 /* If this is an unconditional branch to the same label, succeed.
18210 If it is to another label, do nothing. If it is conditional,
18211 fail. */
18212 /* XXX Probably, the tests for SET and the PC are
18213 unnecessary. */
18214
18215 scanbody = PATTERN (this_insn);
18216 if (GET_CODE (scanbody) == SET
18217 && GET_CODE (SET_DEST (scanbody)) == PC)
18218 {
18219 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18220 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18221 {
18222 arm_ccfsm_state = 2;
18223 succeed = TRUE;
18224 }
18225 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18226 fail = TRUE;
18227 }
18228 /* Fail if a conditional return is undesirable (e.g. on a
18229 StrongARM), but still allow this if optimizing for size. */
18230 else if (GET_CODE (scanbody) == return_code
18231 && !use_return_insn (TRUE, NULL)
18232 && !optimize_size)
18233 fail = TRUE;
18234 else if (GET_CODE (scanbody) == return_code)
18235 {
18236 arm_ccfsm_state = 2;
18237 succeed = TRUE;
18238 }
18239 else if (GET_CODE (scanbody) == PARALLEL)
18240 {
18241 switch (get_attr_conds (this_insn))
18242 {
18243 case CONDS_NOCOND:
18244 break;
18245 default:
18246 fail = TRUE;
18247 break;
18248 }
18249 }
18250 else
18251 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18252
18253 break;
18254
18255 case INSN:
18256 /* Instructions using or affecting the condition codes make it
18257 fail. */
18258 scanbody = PATTERN (this_insn);
18259 if (!(GET_CODE (scanbody) == SET
18260 || GET_CODE (scanbody) == PARALLEL)
18261 || get_attr_conds (this_insn) != CONDS_NOCOND)
18262 fail = TRUE;
18263 break;
18264
18265 default:
18266 break;
18267 }
18268 }
18269 if (succeed)
18270 {
18271 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18272 arm_target_label = CODE_LABEL_NUMBER (label);
18273 else
18274 {
18275 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18276
18277 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18278 {
18279 this_insn = next_nonnote_insn (this_insn);
18280 gcc_assert (!this_insn
18281 || (!BARRIER_P (this_insn)
18282 && !LABEL_P (this_insn)));
18283 }
18284 if (!this_insn)
18285 {
18286 /* Oh, dear! we ran off the end.. give up. */
18287 extract_constrain_insn_cached (insn);
18288 arm_ccfsm_state = 0;
18289 arm_target_insn = NULL;
18290 return;
18291 }
18292 arm_target_insn = this_insn;
18293 }
18294
18295 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18296 what it was. */
18297 if (!reverse)
18298 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18299
18300 if (reverse || then_not_else)
18301 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18302 }
18303
18304 /* Restore recog_data (getting the attributes of other insns can
18305 destroy this array, but final.c assumes that it remains intact
18306 across this call. */
18307 extract_constrain_insn_cached (insn);
18308 }
18309 }
18310
18311 /* Output IT instructions. */
18312 void
18313 thumb2_asm_output_opcode (FILE * stream)
18314 {
18315 char buff[5];
18316 int n;
18317
18318 if (arm_condexec_mask)
18319 {
18320 for (n = 0; n < arm_condexec_masklen; n++)
18321 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18322 buff[n] = 0;
18323 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18324 arm_condition_codes[arm_current_cc]);
18325 arm_condexec_mask = 0;
18326 }
18327 }
18328
18329 /* Returns true if REGNO is a valid register
18330 for holding a quantity of type MODE. */
18331 int
18332 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18333 {
18334 if (GET_MODE_CLASS (mode) == MODE_CC)
18335 return (regno == CC_REGNUM
18336 || (TARGET_HARD_FLOAT && TARGET_VFP
18337 && regno == VFPCC_REGNUM));
18338
18339 if (TARGET_THUMB1)
18340 /* For the Thumb we only allow values bigger than SImode in
18341 registers 0 - 6, so that there is always a second low
18342 register available to hold the upper part of the value.
18343 We probably we ought to ensure that the register is the
18344 start of an even numbered register pair. */
18345 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18346
18347 if (TARGET_HARD_FLOAT && TARGET_VFP
18348 && IS_VFP_REGNUM (regno))
18349 {
18350 if (mode == SFmode || mode == SImode)
18351 return VFP_REGNO_OK_FOR_SINGLE (regno);
18352
18353 if (mode == DFmode)
18354 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18355
18356 /* VFP registers can hold HFmode values, but there is no point in
18357 putting them there unless we have hardware conversion insns. */
18358 if (mode == HFmode)
18359 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18360
18361 if (TARGET_NEON)
18362 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18363 || (VALID_NEON_QREG_MODE (mode)
18364 && NEON_REGNO_OK_FOR_QUAD (regno))
18365 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18366 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18367 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18368 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18369 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18370
18371 return FALSE;
18372 }
18373
18374 if (TARGET_REALLY_IWMMXT)
18375 {
18376 if (IS_IWMMXT_GR_REGNUM (regno))
18377 return mode == SImode;
18378
18379 if (IS_IWMMXT_REGNUM (regno))
18380 return VALID_IWMMXT_REG_MODE (mode);
18381 }
18382
18383 /* We allow almost any value to be stored in the general registers.
18384 Restrict doubleword quantities to even register pairs so that we can
18385 use ldrd. Do not allow very large Neon structure opaque modes in
18386 general registers; they would use too many. */
18387 if (regno <= LAST_ARM_REGNUM)
18388 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18389 && ARM_NUM_REGS (mode) <= 4;
18390
18391 if (regno == FRAME_POINTER_REGNUM
18392 || regno == ARG_POINTER_REGNUM)
18393 /* We only allow integers in the fake hard registers. */
18394 return GET_MODE_CLASS (mode) == MODE_INT;
18395
18396 return FALSE;
18397 }
18398
18399 /* Implement MODES_TIEABLE_P. */
18400
18401 bool
18402 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18403 {
18404 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18405 return true;
18406
18407 /* We specifically want to allow elements of "structure" modes to
18408 be tieable to the structure. This more general condition allows
18409 other rarer situations too. */
18410 if (TARGET_NEON
18411 && (VALID_NEON_DREG_MODE (mode1)
18412 || VALID_NEON_QREG_MODE (mode1)
18413 || VALID_NEON_STRUCT_MODE (mode1))
18414 && (VALID_NEON_DREG_MODE (mode2)
18415 || VALID_NEON_QREG_MODE (mode2)
18416 || VALID_NEON_STRUCT_MODE (mode2)))
18417 return true;
18418
18419 return false;
18420 }
18421
18422 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18423 not used in arm mode. */
18424
18425 enum reg_class
18426 arm_regno_class (int regno)
18427 {
18428 if (TARGET_THUMB1)
18429 {
18430 if (regno == STACK_POINTER_REGNUM)
18431 return STACK_REG;
18432 if (regno == CC_REGNUM)
18433 return CC_REG;
18434 if (regno < 8)
18435 return LO_REGS;
18436 return HI_REGS;
18437 }
18438
18439 if (TARGET_THUMB2 && regno < 8)
18440 return LO_REGS;
18441
18442 if ( regno <= LAST_ARM_REGNUM
18443 || regno == FRAME_POINTER_REGNUM
18444 || regno == ARG_POINTER_REGNUM)
18445 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18446
18447 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18448 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18449
18450 if (IS_VFP_REGNUM (regno))
18451 {
18452 if (regno <= D7_VFP_REGNUM)
18453 return VFP_D0_D7_REGS;
18454 else if (regno <= LAST_LO_VFP_REGNUM)
18455 return VFP_LO_REGS;
18456 else
18457 return VFP_HI_REGS;
18458 }
18459
18460 if (IS_IWMMXT_REGNUM (regno))
18461 return IWMMXT_REGS;
18462
18463 if (IS_IWMMXT_GR_REGNUM (regno))
18464 return IWMMXT_GR_REGS;
18465
18466 return NO_REGS;
18467 }
18468
18469 /* Handle a special case when computing the offset
18470 of an argument from the frame pointer. */
18471 int
18472 arm_debugger_arg_offset (int value, rtx addr)
18473 {
18474 rtx insn;
18475
18476 /* We are only interested if dbxout_parms() failed to compute the offset. */
18477 if (value != 0)
18478 return 0;
18479
18480 /* We can only cope with the case where the address is held in a register. */
18481 if (!REG_P (addr))
18482 return 0;
18483
18484 /* If we are using the frame pointer to point at the argument, then
18485 an offset of 0 is correct. */
18486 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18487 return 0;
18488
18489 /* If we are using the stack pointer to point at the
18490 argument, then an offset of 0 is correct. */
18491 /* ??? Check this is consistent with thumb2 frame layout. */
18492 if ((TARGET_THUMB || !frame_pointer_needed)
18493 && REGNO (addr) == SP_REGNUM)
18494 return 0;
18495
18496 /* Oh dear. The argument is pointed to by a register rather
18497 than being held in a register, or being stored at a known
18498 offset from the frame pointer. Since GDB only understands
18499 those two kinds of argument we must translate the address
18500 held in the register into an offset from the frame pointer.
18501 We do this by searching through the insns for the function
18502 looking to see where this register gets its value. If the
18503 register is initialized from the frame pointer plus an offset
18504 then we are in luck and we can continue, otherwise we give up.
18505
18506 This code is exercised by producing debugging information
18507 for a function with arguments like this:
18508
18509 double func (double a, double b, int c, double d) {return d;}
18510
18511 Without this code the stab for parameter 'd' will be set to
18512 an offset of 0 from the frame pointer, rather than 8. */
18513
18514 /* The if() statement says:
18515
18516 If the insn is a normal instruction
18517 and if the insn is setting the value in a register
18518 and if the register being set is the register holding the address of the argument
18519 and if the address is computing by an addition
18520 that involves adding to a register
18521 which is the frame pointer
18522 a constant integer
18523
18524 then... */
18525
18526 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18527 {
18528 if ( NONJUMP_INSN_P (insn)
18529 && GET_CODE (PATTERN (insn)) == SET
18530 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18531 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18532 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
18533 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18534 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
18535 )
18536 {
18537 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18538
18539 break;
18540 }
18541 }
18542
18543 if (value == 0)
18544 {
18545 debug_rtx (addr);
18546 warning (0, "unable to compute real location of stacked parameter");
18547 value = 8; /* XXX magic hack */
18548 }
18549
18550 return value;
18551 }
18552 \f
18553 typedef enum {
18554 T_V8QI,
18555 T_V4HI,
18556 T_V2SI,
18557 T_V2SF,
18558 T_DI,
18559 T_V16QI,
18560 T_V8HI,
18561 T_V4SI,
18562 T_V4SF,
18563 T_V2DI,
18564 T_TI,
18565 T_EI,
18566 T_OI,
18567 T_MAX /* Size of enum. Keep last. */
18568 } neon_builtin_type_mode;
18569
18570 #define TYPE_MODE_BIT(X) (1 << (X))
18571
18572 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18573 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18574 | TYPE_MODE_BIT (T_DI))
18575 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18576 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18577 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18578
18579 #define v8qi_UP T_V8QI
18580 #define v4hi_UP T_V4HI
18581 #define v2si_UP T_V2SI
18582 #define v2sf_UP T_V2SF
18583 #define di_UP T_DI
18584 #define v16qi_UP T_V16QI
18585 #define v8hi_UP T_V8HI
18586 #define v4si_UP T_V4SI
18587 #define v4sf_UP T_V4SF
18588 #define v2di_UP T_V2DI
18589 #define ti_UP T_TI
18590 #define ei_UP T_EI
18591 #define oi_UP T_OI
18592
18593 #define UP(X) X##_UP
18594
18595 typedef enum {
18596 NEON_BINOP,
18597 NEON_TERNOP,
18598 NEON_UNOP,
18599 NEON_GETLANE,
18600 NEON_SETLANE,
18601 NEON_CREATE,
18602 NEON_DUP,
18603 NEON_DUPLANE,
18604 NEON_COMBINE,
18605 NEON_SPLIT,
18606 NEON_LANEMUL,
18607 NEON_LANEMULL,
18608 NEON_LANEMULH,
18609 NEON_LANEMAC,
18610 NEON_SCALARMUL,
18611 NEON_SCALARMULL,
18612 NEON_SCALARMULH,
18613 NEON_SCALARMAC,
18614 NEON_CONVERT,
18615 NEON_FIXCONV,
18616 NEON_SELECT,
18617 NEON_RESULTPAIR,
18618 NEON_REINTERP,
18619 NEON_VTBL,
18620 NEON_VTBX,
18621 NEON_LOAD1,
18622 NEON_LOAD1LANE,
18623 NEON_STORE1,
18624 NEON_STORE1LANE,
18625 NEON_LOADSTRUCT,
18626 NEON_LOADSTRUCTLANE,
18627 NEON_STORESTRUCT,
18628 NEON_STORESTRUCTLANE,
18629 NEON_LOGICBINOP,
18630 NEON_SHIFTINSERT,
18631 NEON_SHIFTIMM,
18632 NEON_SHIFTACC
18633 } neon_itype;
18634
18635 typedef struct {
18636 const char *name;
18637 const neon_itype itype;
18638 const neon_builtin_type_mode mode;
18639 const enum insn_code code;
18640 unsigned int fcode;
18641 } neon_builtin_datum;
18642
18643 #define CF(N,X) CODE_FOR_neon_##N##X
18644
18645 #define VAR1(T, N, A) \
18646 {#N, NEON_##T, UP (A), CF (N, A), 0}
18647 #define VAR2(T, N, A, B) \
18648 VAR1 (T, N, A), \
18649 {#N, NEON_##T, UP (B), CF (N, B), 0}
18650 #define VAR3(T, N, A, B, C) \
18651 VAR2 (T, N, A, B), \
18652 {#N, NEON_##T, UP (C), CF (N, C), 0}
18653 #define VAR4(T, N, A, B, C, D) \
18654 VAR3 (T, N, A, B, C), \
18655 {#N, NEON_##T, UP (D), CF (N, D), 0}
18656 #define VAR5(T, N, A, B, C, D, E) \
18657 VAR4 (T, N, A, B, C, D), \
18658 {#N, NEON_##T, UP (E), CF (N, E), 0}
18659 #define VAR6(T, N, A, B, C, D, E, F) \
18660 VAR5 (T, N, A, B, C, D, E), \
18661 {#N, NEON_##T, UP (F), CF (N, F), 0}
18662 #define VAR7(T, N, A, B, C, D, E, F, G) \
18663 VAR6 (T, N, A, B, C, D, E, F), \
18664 {#N, NEON_##T, UP (G), CF (N, G), 0}
18665 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18666 VAR7 (T, N, A, B, C, D, E, F, G), \
18667 {#N, NEON_##T, UP (H), CF (N, H), 0}
18668 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18669 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18670 {#N, NEON_##T, UP (I), CF (N, I), 0}
18671 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18672 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18673 {#N, NEON_##T, UP (J), CF (N, J), 0}
18674
18675 /* The mode entries in the following table correspond to the "key" type of the
18676 instruction variant, i.e. equivalent to that which would be specified after
18677 the assembler mnemonic, which usually refers to the last vector operand.
18678 (Signed/unsigned/polynomial types are not differentiated between though, and
18679 are all mapped onto the same mode for a given element size.) The modes
18680 listed per instruction should be the same as those defined for that
18681 instruction's pattern in neon.md. */
18682
18683 static neon_builtin_datum neon_builtin_data[] =
18684 {
18685 VAR10 (BINOP, vadd,
18686 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18687 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18688 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18689 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18690 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18691 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18692 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18693 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18694 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18695 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18696 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18697 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18698 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18699 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18700 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18701 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18702 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18703 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18704 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18705 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18706 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18707 VAR2 (BINOP, vqdmull, v4hi, v2si),
18708 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18709 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18710 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18711 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18712 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18713 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18714 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18715 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18716 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18717 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18718 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18719 VAR10 (BINOP, vsub,
18720 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18721 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18722 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18723 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18724 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18725 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18726 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18727 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18728 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18729 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18730 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18731 VAR2 (BINOP, vcage, v2sf, v4sf),
18732 VAR2 (BINOP, vcagt, v2sf, v4sf),
18733 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18734 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18735 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18736 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18737 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18738 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18739 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18740 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18741 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18742 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18743 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18744 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18745 VAR2 (BINOP, vrecps, v2sf, v4sf),
18746 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18747 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18748 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18749 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18750 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18751 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18752 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18753 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18754 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18755 VAR2 (UNOP, vcnt, v8qi, v16qi),
18756 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18757 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18758 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18759 /* FIXME: vget_lane supports more variants than this! */
18760 VAR10 (GETLANE, vget_lane,
18761 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18762 VAR10 (SETLANE, vset_lane,
18763 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18764 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18765 VAR10 (DUP, vdup_n,
18766 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18767 VAR10 (DUPLANE, vdup_lane,
18768 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18769 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18770 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18771 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18772 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18773 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18774 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18775 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18776 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18777 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18778 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18779 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18780 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18781 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18782 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18783 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18784 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18785 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18786 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18787 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18788 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18789 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18790 VAR10 (BINOP, vext,
18791 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18792 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18793 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18794 VAR2 (UNOP, vrev16, v8qi, v16qi),
18795 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18796 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18797 VAR10 (SELECT, vbsl,
18798 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18799 VAR1 (VTBL, vtbl1, v8qi),
18800 VAR1 (VTBL, vtbl2, v8qi),
18801 VAR1 (VTBL, vtbl3, v8qi),
18802 VAR1 (VTBL, vtbl4, v8qi),
18803 VAR1 (VTBX, vtbx1, v8qi),
18804 VAR1 (VTBX, vtbx2, v8qi),
18805 VAR1 (VTBX, vtbx3, v8qi),
18806 VAR1 (VTBX, vtbx4, v8qi),
18807 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18808 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18809 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18810 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18811 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18812 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18813 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18814 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18815 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18816 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18817 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18818 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18819 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18820 VAR10 (LOAD1, vld1,
18821 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18822 VAR10 (LOAD1LANE, vld1_lane,
18823 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18824 VAR10 (LOAD1, vld1_dup,
18825 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18826 VAR10 (STORE1, vst1,
18827 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18828 VAR10 (STORE1LANE, vst1_lane,
18829 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18830 VAR9 (LOADSTRUCT,
18831 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18832 VAR7 (LOADSTRUCTLANE, vld2_lane,
18833 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18834 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18835 VAR9 (STORESTRUCT, vst2,
18836 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18837 VAR7 (STORESTRUCTLANE, vst2_lane,
18838 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18839 VAR9 (LOADSTRUCT,
18840 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18841 VAR7 (LOADSTRUCTLANE, vld3_lane,
18842 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18843 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18844 VAR9 (STORESTRUCT, vst3,
18845 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18846 VAR7 (STORESTRUCTLANE, vst3_lane,
18847 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18848 VAR9 (LOADSTRUCT, vld4,
18849 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18850 VAR7 (LOADSTRUCTLANE, vld4_lane,
18851 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18852 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18853 VAR9 (STORESTRUCT, vst4,
18854 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18855 VAR7 (STORESTRUCTLANE, vst4_lane,
18856 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18857 VAR10 (LOGICBINOP, vand,
18858 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18859 VAR10 (LOGICBINOP, vorr,
18860 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18861 VAR10 (BINOP, veor,
18862 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18863 VAR10 (LOGICBINOP, vbic,
18864 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18865 VAR10 (LOGICBINOP, vorn,
18866 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18867 };
18868
18869 #undef CF
18870 #undef VAR1
18871 #undef VAR2
18872 #undef VAR3
18873 #undef VAR4
18874 #undef VAR5
18875 #undef VAR6
18876 #undef VAR7
18877 #undef VAR8
18878 #undef VAR9
18879 #undef VAR10
18880
18881 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18882 symbolic names defined here (which would require too much duplication).
18883 FIXME? */
18884 enum arm_builtins
18885 {
18886 ARM_BUILTIN_GETWCGR0,
18887 ARM_BUILTIN_GETWCGR1,
18888 ARM_BUILTIN_GETWCGR2,
18889 ARM_BUILTIN_GETWCGR3,
18890
18891 ARM_BUILTIN_SETWCGR0,
18892 ARM_BUILTIN_SETWCGR1,
18893 ARM_BUILTIN_SETWCGR2,
18894 ARM_BUILTIN_SETWCGR3,
18895
18896 ARM_BUILTIN_WZERO,
18897
18898 ARM_BUILTIN_WAVG2BR,
18899 ARM_BUILTIN_WAVG2HR,
18900 ARM_BUILTIN_WAVG2B,
18901 ARM_BUILTIN_WAVG2H,
18902
18903 ARM_BUILTIN_WACCB,
18904 ARM_BUILTIN_WACCH,
18905 ARM_BUILTIN_WACCW,
18906
18907 ARM_BUILTIN_WMACS,
18908 ARM_BUILTIN_WMACSZ,
18909 ARM_BUILTIN_WMACU,
18910 ARM_BUILTIN_WMACUZ,
18911
18912 ARM_BUILTIN_WSADB,
18913 ARM_BUILTIN_WSADBZ,
18914 ARM_BUILTIN_WSADH,
18915 ARM_BUILTIN_WSADHZ,
18916
18917 ARM_BUILTIN_WALIGNI,
18918 ARM_BUILTIN_WALIGNR0,
18919 ARM_BUILTIN_WALIGNR1,
18920 ARM_BUILTIN_WALIGNR2,
18921 ARM_BUILTIN_WALIGNR3,
18922
18923 ARM_BUILTIN_TMIA,
18924 ARM_BUILTIN_TMIAPH,
18925 ARM_BUILTIN_TMIABB,
18926 ARM_BUILTIN_TMIABT,
18927 ARM_BUILTIN_TMIATB,
18928 ARM_BUILTIN_TMIATT,
18929
18930 ARM_BUILTIN_TMOVMSKB,
18931 ARM_BUILTIN_TMOVMSKH,
18932 ARM_BUILTIN_TMOVMSKW,
18933
18934 ARM_BUILTIN_TBCSTB,
18935 ARM_BUILTIN_TBCSTH,
18936 ARM_BUILTIN_TBCSTW,
18937
18938 ARM_BUILTIN_WMADDS,
18939 ARM_BUILTIN_WMADDU,
18940
18941 ARM_BUILTIN_WPACKHSS,
18942 ARM_BUILTIN_WPACKWSS,
18943 ARM_BUILTIN_WPACKDSS,
18944 ARM_BUILTIN_WPACKHUS,
18945 ARM_BUILTIN_WPACKWUS,
18946 ARM_BUILTIN_WPACKDUS,
18947
18948 ARM_BUILTIN_WADDB,
18949 ARM_BUILTIN_WADDH,
18950 ARM_BUILTIN_WADDW,
18951 ARM_BUILTIN_WADDSSB,
18952 ARM_BUILTIN_WADDSSH,
18953 ARM_BUILTIN_WADDSSW,
18954 ARM_BUILTIN_WADDUSB,
18955 ARM_BUILTIN_WADDUSH,
18956 ARM_BUILTIN_WADDUSW,
18957 ARM_BUILTIN_WSUBB,
18958 ARM_BUILTIN_WSUBH,
18959 ARM_BUILTIN_WSUBW,
18960 ARM_BUILTIN_WSUBSSB,
18961 ARM_BUILTIN_WSUBSSH,
18962 ARM_BUILTIN_WSUBSSW,
18963 ARM_BUILTIN_WSUBUSB,
18964 ARM_BUILTIN_WSUBUSH,
18965 ARM_BUILTIN_WSUBUSW,
18966
18967 ARM_BUILTIN_WAND,
18968 ARM_BUILTIN_WANDN,
18969 ARM_BUILTIN_WOR,
18970 ARM_BUILTIN_WXOR,
18971
18972 ARM_BUILTIN_WCMPEQB,
18973 ARM_BUILTIN_WCMPEQH,
18974 ARM_BUILTIN_WCMPEQW,
18975 ARM_BUILTIN_WCMPGTUB,
18976 ARM_BUILTIN_WCMPGTUH,
18977 ARM_BUILTIN_WCMPGTUW,
18978 ARM_BUILTIN_WCMPGTSB,
18979 ARM_BUILTIN_WCMPGTSH,
18980 ARM_BUILTIN_WCMPGTSW,
18981
18982 ARM_BUILTIN_TEXTRMSB,
18983 ARM_BUILTIN_TEXTRMSH,
18984 ARM_BUILTIN_TEXTRMSW,
18985 ARM_BUILTIN_TEXTRMUB,
18986 ARM_BUILTIN_TEXTRMUH,
18987 ARM_BUILTIN_TEXTRMUW,
18988 ARM_BUILTIN_TINSRB,
18989 ARM_BUILTIN_TINSRH,
18990 ARM_BUILTIN_TINSRW,
18991
18992 ARM_BUILTIN_WMAXSW,
18993 ARM_BUILTIN_WMAXSH,
18994 ARM_BUILTIN_WMAXSB,
18995 ARM_BUILTIN_WMAXUW,
18996 ARM_BUILTIN_WMAXUH,
18997 ARM_BUILTIN_WMAXUB,
18998 ARM_BUILTIN_WMINSW,
18999 ARM_BUILTIN_WMINSH,
19000 ARM_BUILTIN_WMINSB,
19001 ARM_BUILTIN_WMINUW,
19002 ARM_BUILTIN_WMINUH,
19003 ARM_BUILTIN_WMINUB,
19004
19005 ARM_BUILTIN_WMULUM,
19006 ARM_BUILTIN_WMULSM,
19007 ARM_BUILTIN_WMULUL,
19008
19009 ARM_BUILTIN_PSADBH,
19010 ARM_BUILTIN_WSHUFH,
19011
19012 ARM_BUILTIN_WSLLH,
19013 ARM_BUILTIN_WSLLW,
19014 ARM_BUILTIN_WSLLD,
19015 ARM_BUILTIN_WSRAH,
19016 ARM_BUILTIN_WSRAW,
19017 ARM_BUILTIN_WSRAD,
19018 ARM_BUILTIN_WSRLH,
19019 ARM_BUILTIN_WSRLW,
19020 ARM_BUILTIN_WSRLD,
19021 ARM_BUILTIN_WRORH,
19022 ARM_BUILTIN_WRORW,
19023 ARM_BUILTIN_WRORD,
19024 ARM_BUILTIN_WSLLHI,
19025 ARM_BUILTIN_WSLLWI,
19026 ARM_BUILTIN_WSLLDI,
19027 ARM_BUILTIN_WSRAHI,
19028 ARM_BUILTIN_WSRAWI,
19029 ARM_BUILTIN_WSRADI,
19030 ARM_BUILTIN_WSRLHI,
19031 ARM_BUILTIN_WSRLWI,
19032 ARM_BUILTIN_WSRLDI,
19033 ARM_BUILTIN_WRORHI,
19034 ARM_BUILTIN_WRORWI,
19035 ARM_BUILTIN_WRORDI,
19036
19037 ARM_BUILTIN_WUNPCKIHB,
19038 ARM_BUILTIN_WUNPCKIHH,
19039 ARM_BUILTIN_WUNPCKIHW,
19040 ARM_BUILTIN_WUNPCKILB,
19041 ARM_BUILTIN_WUNPCKILH,
19042 ARM_BUILTIN_WUNPCKILW,
19043
19044 ARM_BUILTIN_WUNPCKEHSB,
19045 ARM_BUILTIN_WUNPCKEHSH,
19046 ARM_BUILTIN_WUNPCKEHSW,
19047 ARM_BUILTIN_WUNPCKEHUB,
19048 ARM_BUILTIN_WUNPCKEHUH,
19049 ARM_BUILTIN_WUNPCKEHUW,
19050 ARM_BUILTIN_WUNPCKELSB,
19051 ARM_BUILTIN_WUNPCKELSH,
19052 ARM_BUILTIN_WUNPCKELSW,
19053 ARM_BUILTIN_WUNPCKELUB,
19054 ARM_BUILTIN_WUNPCKELUH,
19055 ARM_BUILTIN_WUNPCKELUW,
19056
19057 ARM_BUILTIN_WABSB,
19058 ARM_BUILTIN_WABSH,
19059 ARM_BUILTIN_WABSW,
19060
19061 ARM_BUILTIN_WADDSUBHX,
19062 ARM_BUILTIN_WSUBADDHX,
19063
19064 ARM_BUILTIN_WABSDIFFB,
19065 ARM_BUILTIN_WABSDIFFH,
19066 ARM_BUILTIN_WABSDIFFW,
19067
19068 ARM_BUILTIN_WADDCH,
19069 ARM_BUILTIN_WADDCW,
19070
19071 ARM_BUILTIN_WAVG4,
19072 ARM_BUILTIN_WAVG4R,
19073
19074 ARM_BUILTIN_WMADDSX,
19075 ARM_BUILTIN_WMADDUX,
19076
19077 ARM_BUILTIN_WMADDSN,
19078 ARM_BUILTIN_WMADDUN,
19079
19080 ARM_BUILTIN_WMULWSM,
19081 ARM_BUILTIN_WMULWUM,
19082
19083 ARM_BUILTIN_WMULWSMR,
19084 ARM_BUILTIN_WMULWUMR,
19085
19086 ARM_BUILTIN_WMULWL,
19087
19088 ARM_BUILTIN_WMULSMR,
19089 ARM_BUILTIN_WMULUMR,
19090
19091 ARM_BUILTIN_WQMULM,
19092 ARM_BUILTIN_WQMULMR,
19093
19094 ARM_BUILTIN_WQMULWM,
19095 ARM_BUILTIN_WQMULWMR,
19096
19097 ARM_BUILTIN_WADDBHUSM,
19098 ARM_BUILTIN_WADDBHUSL,
19099
19100 ARM_BUILTIN_WQMIABB,
19101 ARM_BUILTIN_WQMIABT,
19102 ARM_BUILTIN_WQMIATB,
19103 ARM_BUILTIN_WQMIATT,
19104
19105 ARM_BUILTIN_WQMIABBN,
19106 ARM_BUILTIN_WQMIABTN,
19107 ARM_BUILTIN_WQMIATBN,
19108 ARM_BUILTIN_WQMIATTN,
19109
19110 ARM_BUILTIN_WMIABB,
19111 ARM_BUILTIN_WMIABT,
19112 ARM_BUILTIN_WMIATB,
19113 ARM_BUILTIN_WMIATT,
19114
19115 ARM_BUILTIN_WMIABBN,
19116 ARM_BUILTIN_WMIABTN,
19117 ARM_BUILTIN_WMIATBN,
19118 ARM_BUILTIN_WMIATTN,
19119
19120 ARM_BUILTIN_WMIAWBB,
19121 ARM_BUILTIN_WMIAWBT,
19122 ARM_BUILTIN_WMIAWTB,
19123 ARM_BUILTIN_WMIAWTT,
19124
19125 ARM_BUILTIN_WMIAWBBN,
19126 ARM_BUILTIN_WMIAWBTN,
19127 ARM_BUILTIN_WMIAWTBN,
19128 ARM_BUILTIN_WMIAWTTN,
19129
19130 ARM_BUILTIN_WMERGE,
19131
19132 ARM_BUILTIN_THREAD_POINTER,
19133
19134 ARM_BUILTIN_NEON_BASE,
19135
19136 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19137 };
19138
19139 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19140
19141 static void
19142 arm_init_neon_builtins (void)
19143 {
19144 unsigned int i, fcode;
19145 tree decl;
19146
19147 tree neon_intQI_type_node;
19148 tree neon_intHI_type_node;
19149 tree neon_polyQI_type_node;
19150 tree neon_polyHI_type_node;
19151 tree neon_intSI_type_node;
19152 tree neon_intDI_type_node;
19153 tree neon_float_type_node;
19154
19155 tree intQI_pointer_node;
19156 tree intHI_pointer_node;
19157 tree intSI_pointer_node;
19158 tree intDI_pointer_node;
19159 tree float_pointer_node;
19160
19161 tree const_intQI_node;
19162 tree const_intHI_node;
19163 tree const_intSI_node;
19164 tree const_intDI_node;
19165 tree const_float_node;
19166
19167 tree const_intQI_pointer_node;
19168 tree const_intHI_pointer_node;
19169 tree const_intSI_pointer_node;
19170 tree const_intDI_pointer_node;
19171 tree const_float_pointer_node;
19172
19173 tree V8QI_type_node;
19174 tree V4HI_type_node;
19175 tree V2SI_type_node;
19176 tree V2SF_type_node;
19177 tree V16QI_type_node;
19178 tree V8HI_type_node;
19179 tree V4SI_type_node;
19180 tree V4SF_type_node;
19181 tree V2DI_type_node;
19182
19183 tree intUQI_type_node;
19184 tree intUHI_type_node;
19185 tree intUSI_type_node;
19186 tree intUDI_type_node;
19187
19188 tree intEI_type_node;
19189 tree intOI_type_node;
19190 tree intCI_type_node;
19191 tree intXI_type_node;
19192
19193 tree V8QI_pointer_node;
19194 tree V4HI_pointer_node;
19195 tree V2SI_pointer_node;
19196 tree V2SF_pointer_node;
19197 tree V16QI_pointer_node;
19198 tree V8HI_pointer_node;
19199 tree V4SI_pointer_node;
19200 tree V4SF_pointer_node;
19201 tree V2DI_pointer_node;
19202
19203 tree void_ftype_pv8qi_v8qi_v8qi;
19204 tree void_ftype_pv4hi_v4hi_v4hi;
19205 tree void_ftype_pv2si_v2si_v2si;
19206 tree void_ftype_pv2sf_v2sf_v2sf;
19207 tree void_ftype_pdi_di_di;
19208 tree void_ftype_pv16qi_v16qi_v16qi;
19209 tree void_ftype_pv8hi_v8hi_v8hi;
19210 tree void_ftype_pv4si_v4si_v4si;
19211 tree void_ftype_pv4sf_v4sf_v4sf;
19212 tree void_ftype_pv2di_v2di_v2di;
19213
19214 tree reinterp_ftype_dreg[5][5];
19215 tree reinterp_ftype_qreg[5][5];
19216 tree dreg_types[5], qreg_types[5];
19217
19218 /* Create distinguished type nodes for NEON vector element types,
19219 and pointers to values of such types, so we can detect them later. */
19220 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19221 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19222 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19223 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19224 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19225 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19226 neon_float_type_node = make_node (REAL_TYPE);
19227 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19228 layout_type (neon_float_type_node);
19229
19230 /* Define typedefs which exactly correspond to the modes we are basing vector
19231 types on. If you change these names you'll need to change
19232 the table used by arm_mangle_type too. */
19233 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19234 "__builtin_neon_qi");
19235 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19236 "__builtin_neon_hi");
19237 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19238 "__builtin_neon_si");
19239 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19240 "__builtin_neon_sf");
19241 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19242 "__builtin_neon_di");
19243 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19244 "__builtin_neon_poly8");
19245 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19246 "__builtin_neon_poly16");
19247
19248 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19249 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19250 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19251 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19252 float_pointer_node = build_pointer_type (neon_float_type_node);
19253
19254 /* Next create constant-qualified versions of the above types. */
19255 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19256 TYPE_QUAL_CONST);
19257 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19258 TYPE_QUAL_CONST);
19259 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19260 TYPE_QUAL_CONST);
19261 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19262 TYPE_QUAL_CONST);
19263 const_float_node = build_qualified_type (neon_float_type_node,
19264 TYPE_QUAL_CONST);
19265
19266 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19267 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19268 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19269 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19270 const_float_pointer_node = build_pointer_type (const_float_node);
19271
19272 /* Now create vector types based on our NEON element types. */
19273 /* 64-bit vectors. */
19274 V8QI_type_node =
19275 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19276 V4HI_type_node =
19277 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19278 V2SI_type_node =
19279 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19280 V2SF_type_node =
19281 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19282 /* 128-bit vectors. */
19283 V16QI_type_node =
19284 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19285 V8HI_type_node =
19286 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19287 V4SI_type_node =
19288 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19289 V4SF_type_node =
19290 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19291 V2DI_type_node =
19292 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19293
19294 /* Unsigned integer types for various mode sizes. */
19295 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19296 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19297 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19298 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19299
19300 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19301 "__builtin_neon_uqi");
19302 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19303 "__builtin_neon_uhi");
19304 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19305 "__builtin_neon_usi");
19306 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19307 "__builtin_neon_udi");
19308
19309 /* Opaque integer types for structures of vectors. */
19310 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19311 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19312 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19313 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19314
19315 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19316 "__builtin_neon_ti");
19317 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19318 "__builtin_neon_ei");
19319 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19320 "__builtin_neon_oi");
19321 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19322 "__builtin_neon_ci");
19323 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19324 "__builtin_neon_xi");
19325
19326 /* Pointers to vector types. */
19327 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19328 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19329 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19330 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19331 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19332 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19333 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19334 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19335 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19336
19337 /* Operations which return results as pairs. */
19338 void_ftype_pv8qi_v8qi_v8qi =
19339 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19340 V8QI_type_node, NULL);
19341 void_ftype_pv4hi_v4hi_v4hi =
19342 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19343 V4HI_type_node, NULL);
19344 void_ftype_pv2si_v2si_v2si =
19345 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19346 V2SI_type_node, NULL);
19347 void_ftype_pv2sf_v2sf_v2sf =
19348 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19349 V2SF_type_node, NULL);
19350 void_ftype_pdi_di_di =
19351 build_function_type_list (void_type_node, intDI_pointer_node,
19352 neon_intDI_type_node, neon_intDI_type_node, NULL);
19353 void_ftype_pv16qi_v16qi_v16qi =
19354 build_function_type_list (void_type_node, V16QI_pointer_node,
19355 V16QI_type_node, V16QI_type_node, NULL);
19356 void_ftype_pv8hi_v8hi_v8hi =
19357 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19358 V8HI_type_node, NULL);
19359 void_ftype_pv4si_v4si_v4si =
19360 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19361 V4SI_type_node, NULL);
19362 void_ftype_pv4sf_v4sf_v4sf =
19363 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19364 V4SF_type_node, NULL);
19365 void_ftype_pv2di_v2di_v2di =
19366 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19367 V2DI_type_node, NULL);
19368
19369 dreg_types[0] = V8QI_type_node;
19370 dreg_types[1] = V4HI_type_node;
19371 dreg_types[2] = V2SI_type_node;
19372 dreg_types[3] = V2SF_type_node;
19373 dreg_types[4] = neon_intDI_type_node;
19374
19375 qreg_types[0] = V16QI_type_node;
19376 qreg_types[1] = V8HI_type_node;
19377 qreg_types[2] = V4SI_type_node;
19378 qreg_types[3] = V4SF_type_node;
19379 qreg_types[4] = V2DI_type_node;
19380
19381 for (i = 0; i < 5; i++)
19382 {
19383 int j;
19384 for (j = 0; j < 5; j++)
19385 {
19386 reinterp_ftype_dreg[i][j]
19387 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19388 reinterp_ftype_qreg[i][j]
19389 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19390 }
19391 }
19392
19393 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19394 i < ARRAY_SIZE (neon_builtin_data);
19395 i++, fcode++)
19396 {
19397 neon_builtin_datum *d = &neon_builtin_data[i];
19398
19399 const char* const modenames[] = {
19400 "v8qi", "v4hi", "v2si", "v2sf", "di",
19401 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19402 "ti", "ei", "oi"
19403 };
19404 char namebuf[60];
19405 tree ftype = NULL;
19406 int is_load = 0, is_store = 0;
19407
19408 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19409
19410 d->fcode = fcode;
19411
19412 switch (d->itype)
19413 {
19414 case NEON_LOAD1:
19415 case NEON_LOAD1LANE:
19416 case NEON_LOADSTRUCT:
19417 case NEON_LOADSTRUCTLANE:
19418 is_load = 1;
19419 /* Fall through. */
19420 case NEON_STORE1:
19421 case NEON_STORE1LANE:
19422 case NEON_STORESTRUCT:
19423 case NEON_STORESTRUCTLANE:
19424 if (!is_load)
19425 is_store = 1;
19426 /* Fall through. */
19427 case NEON_UNOP:
19428 case NEON_BINOP:
19429 case NEON_LOGICBINOP:
19430 case NEON_SHIFTINSERT:
19431 case NEON_TERNOP:
19432 case NEON_GETLANE:
19433 case NEON_SETLANE:
19434 case NEON_CREATE:
19435 case NEON_DUP:
19436 case NEON_DUPLANE:
19437 case NEON_SHIFTIMM:
19438 case NEON_SHIFTACC:
19439 case NEON_COMBINE:
19440 case NEON_SPLIT:
19441 case NEON_CONVERT:
19442 case NEON_FIXCONV:
19443 case NEON_LANEMUL:
19444 case NEON_LANEMULL:
19445 case NEON_LANEMULH:
19446 case NEON_LANEMAC:
19447 case NEON_SCALARMUL:
19448 case NEON_SCALARMULL:
19449 case NEON_SCALARMULH:
19450 case NEON_SCALARMAC:
19451 case NEON_SELECT:
19452 case NEON_VTBL:
19453 case NEON_VTBX:
19454 {
19455 int k;
19456 tree return_type = void_type_node, args = void_list_node;
19457
19458 /* Build a function type directly from the insn_data for
19459 this builtin. The build_function_type() function takes
19460 care of removing duplicates for us. */
19461 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19462 {
19463 tree eltype;
19464
19465 if (is_load && k == 1)
19466 {
19467 /* Neon load patterns always have the memory
19468 operand in the operand 1 position. */
19469 gcc_assert (insn_data[d->code].operand[k].predicate
19470 == neon_struct_operand);
19471
19472 switch (d->mode)
19473 {
19474 case T_V8QI:
19475 case T_V16QI:
19476 eltype = const_intQI_pointer_node;
19477 break;
19478
19479 case T_V4HI:
19480 case T_V8HI:
19481 eltype = const_intHI_pointer_node;
19482 break;
19483
19484 case T_V2SI:
19485 case T_V4SI:
19486 eltype = const_intSI_pointer_node;
19487 break;
19488
19489 case T_V2SF:
19490 case T_V4SF:
19491 eltype = const_float_pointer_node;
19492 break;
19493
19494 case T_DI:
19495 case T_V2DI:
19496 eltype = const_intDI_pointer_node;
19497 break;
19498
19499 default: gcc_unreachable ();
19500 }
19501 }
19502 else if (is_store && k == 0)
19503 {
19504 /* Similarly, Neon store patterns use operand 0 as
19505 the memory location to store to. */
19506 gcc_assert (insn_data[d->code].operand[k].predicate
19507 == neon_struct_operand);
19508
19509 switch (d->mode)
19510 {
19511 case T_V8QI:
19512 case T_V16QI:
19513 eltype = intQI_pointer_node;
19514 break;
19515
19516 case T_V4HI:
19517 case T_V8HI:
19518 eltype = intHI_pointer_node;
19519 break;
19520
19521 case T_V2SI:
19522 case T_V4SI:
19523 eltype = intSI_pointer_node;
19524 break;
19525
19526 case T_V2SF:
19527 case T_V4SF:
19528 eltype = float_pointer_node;
19529 break;
19530
19531 case T_DI:
19532 case T_V2DI:
19533 eltype = intDI_pointer_node;
19534 break;
19535
19536 default: gcc_unreachable ();
19537 }
19538 }
19539 else
19540 {
19541 switch (insn_data[d->code].operand[k].mode)
19542 {
19543 case VOIDmode: eltype = void_type_node; break;
19544 /* Scalars. */
19545 case QImode: eltype = neon_intQI_type_node; break;
19546 case HImode: eltype = neon_intHI_type_node; break;
19547 case SImode: eltype = neon_intSI_type_node; break;
19548 case SFmode: eltype = neon_float_type_node; break;
19549 case DImode: eltype = neon_intDI_type_node; break;
19550 case TImode: eltype = intTI_type_node; break;
19551 case EImode: eltype = intEI_type_node; break;
19552 case OImode: eltype = intOI_type_node; break;
19553 case CImode: eltype = intCI_type_node; break;
19554 case XImode: eltype = intXI_type_node; break;
19555 /* 64-bit vectors. */
19556 case V8QImode: eltype = V8QI_type_node; break;
19557 case V4HImode: eltype = V4HI_type_node; break;
19558 case V2SImode: eltype = V2SI_type_node; break;
19559 case V2SFmode: eltype = V2SF_type_node; break;
19560 /* 128-bit vectors. */
19561 case V16QImode: eltype = V16QI_type_node; break;
19562 case V8HImode: eltype = V8HI_type_node; break;
19563 case V4SImode: eltype = V4SI_type_node; break;
19564 case V4SFmode: eltype = V4SF_type_node; break;
19565 case V2DImode: eltype = V2DI_type_node; break;
19566 default: gcc_unreachable ();
19567 }
19568 }
19569
19570 if (k == 0 && !is_store)
19571 return_type = eltype;
19572 else
19573 args = tree_cons (NULL_TREE, eltype, args);
19574 }
19575
19576 ftype = build_function_type (return_type, args);
19577 }
19578 break;
19579
19580 case NEON_RESULTPAIR:
19581 {
19582 switch (insn_data[d->code].operand[1].mode)
19583 {
19584 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19585 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19586 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19587 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19588 case DImode: ftype = void_ftype_pdi_di_di; break;
19589 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19590 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19591 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19592 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19593 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19594 default: gcc_unreachable ();
19595 }
19596 }
19597 break;
19598
19599 case NEON_REINTERP:
19600 {
19601 /* We iterate over 5 doubleword types, then 5 quadword
19602 types. */
19603 int rhs = d->mode % 5;
19604 switch (insn_data[d->code].operand[0].mode)
19605 {
19606 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19607 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19608 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19609 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19610 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19611 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19612 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19613 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19614 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19615 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19616 default: gcc_unreachable ();
19617 }
19618 }
19619 break;
19620
19621 default:
19622 gcc_unreachable ();
19623 }
19624
19625 gcc_assert (ftype != NULL);
19626
19627 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19628
19629 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19630 NULL_TREE);
19631 arm_builtin_decls[fcode] = decl;
19632 }
19633 }
19634
19635 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19636 do \
19637 { \
19638 if ((MASK) & insn_flags) \
19639 { \
19640 tree bdecl; \
19641 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19642 BUILT_IN_MD, NULL, NULL_TREE); \
19643 arm_builtin_decls[CODE] = bdecl; \
19644 } \
19645 } \
19646 while (0)
19647
19648 struct builtin_description
19649 {
19650 const unsigned int mask;
19651 const enum insn_code icode;
19652 const char * const name;
19653 const enum arm_builtins code;
19654 const enum rtx_code comparison;
19655 const unsigned int flag;
19656 };
19657
19658 static const struct builtin_description bdesc_2arg[] =
19659 {
19660 #define IWMMXT_BUILTIN(code, string, builtin) \
19661 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19662 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19663
19664 #define IWMMXT2_BUILTIN(code, string, builtin) \
19665 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19666 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19667
19668 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19669 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19670 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19671 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19672 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19673 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19674 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19675 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19676 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19677 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19678 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19679 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19680 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19681 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19682 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19683 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19684 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19685 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19686 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19687 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19688 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19689 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19690 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19691 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19692 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19693 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19694 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19695 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19696 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19697 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19698 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19699 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19700 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19701 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19702 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19703 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19704 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19705 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19706 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19707 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19708 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19709 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19710 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19711 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19712 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19713 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19714 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19715 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19716 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19717 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19718 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19719 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19720 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19721 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19722 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19723 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19724 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19725 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19726 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19727 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19728 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19729 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19730 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19731 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19732 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19733 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19734 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19735 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19736 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19737 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19738 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19739 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19740 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19741 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19742 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19743 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19744 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19745 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19746
19747 #define IWMMXT_BUILTIN2(code, builtin) \
19748 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19749
19750 #define IWMMXT2_BUILTIN2(code, builtin) \
19751 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19752
19753 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19754 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19755 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19756 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19757 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19758 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19759 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19760 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19761 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19762 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19763 };
19764
19765 static const struct builtin_description bdesc_1arg[] =
19766 {
19767 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19768 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19769 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19770 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19771 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19772 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19773 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19774 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19775 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19776 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19777 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19778 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19779 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19780 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19781 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19782 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19783 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19784 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19785 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19786 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19787 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19788 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19789 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19790 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19791 };
19792
19793 /* Set up all the iWMMXt builtins. This is not called if
19794 TARGET_IWMMXT is zero. */
19795
19796 static void
19797 arm_init_iwmmxt_builtins (void)
19798 {
19799 const struct builtin_description * d;
19800 size_t i;
19801
19802 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19803 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19804 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19805
19806 tree v8qi_ftype_v8qi_v8qi_int
19807 = build_function_type_list (V8QI_type_node,
19808 V8QI_type_node, V8QI_type_node,
19809 integer_type_node, NULL_TREE);
19810 tree v4hi_ftype_v4hi_int
19811 = build_function_type_list (V4HI_type_node,
19812 V4HI_type_node, integer_type_node, NULL_TREE);
19813 tree v2si_ftype_v2si_int
19814 = build_function_type_list (V2SI_type_node,
19815 V2SI_type_node, integer_type_node, NULL_TREE);
19816 tree v2si_ftype_di_di
19817 = build_function_type_list (V2SI_type_node,
19818 long_long_integer_type_node,
19819 long_long_integer_type_node,
19820 NULL_TREE);
19821 tree di_ftype_di_int
19822 = build_function_type_list (long_long_integer_type_node,
19823 long_long_integer_type_node,
19824 integer_type_node, NULL_TREE);
19825 tree di_ftype_di_int_int
19826 = build_function_type_list (long_long_integer_type_node,
19827 long_long_integer_type_node,
19828 integer_type_node,
19829 integer_type_node, NULL_TREE);
19830 tree int_ftype_v8qi
19831 = build_function_type_list (integer_type_node,
19832 V8QI_type_node, NULL_TREE);
19833 tree int_ftype_v4hi
19834 = build_function_type_list (integer_type_node,
19835 V4HI_type_node, NULL_TREE);
19836 tree int_ftype_v2si
19837 = build_function_type_list (integer_type_node,
19838 V2SI_type_node, NULL_TREE);
19839 tree int_ftype_v8qi_int
19840 = build_function_type_list (integer_type_node,
19841 V8QI_type_node, integer_type_node, NULL_TREE);
19842 tree int_ftype_v4hi_int
19843 = build_function_type_list (integer_type_node,
19844 V4HI_type_node, integer_type_node, NULL_TREE);
19845 tree int_ftype_v2si_int
19846 = build_function_type_list (integer_type_node,
19847 V2SI_type_node, integer_type_node, NULL_TREE);
19848 tree v8qi_ftype_v8qi_int_int
19849 = build_function_type_list (V8QI_type_node,
19850 V8QI_type_node, integer_type_node,
19851 integer_type_node, NULL_TREE);
19852 tree v4hi_ftype_v4hi_int_int
19853 = build_function_type_list (V4HI_type_node,
19854 V4HI_type_node, integer_type_node,
19855 integer_type_node, NULL_TREE);
19856 tree v2si_ftype_v2si_int_int
19857 = build_function_type_list (V2SI_type_node,
19858 V2SI_type_node, integer_type_node,
19859 integer_type_node, NULL_TREE);
19860 /* Miscellaneous. */
19861 tree v8qi_ftype_v4hi_v4hi
19862 = build_function_type_list (V8QI_type_node,
19863 V4HI_type_node, V4HI_type_node, NULL_TREE);
19864 tree v4hi_ftype_v2si_v2si
19865 = build_function_type_list (V4HI_type_node,
19866 V2SI_type_node, V2SI_type_node, NULL_TREE);
19867 tree v8qi_ftype_v4hi_v8qi
19868 = build_function_type_list (V8QI_type_node,
19869 V4HI_type_node, V8QI_type_node, NULL_TREE);
19870 tree v2si_ftype_v4hi_v4hi
19871 = build_function_type_list (V2SI_type_node,
19872 V4HI_type_node, V4HI_type_node, NULL_TREE);
19873 tree v2si_ftype_v8qi_v8qi
19874 = build_function_type_list (V2SI_type_node,
19875 V8QI_type_node, V8QI_type_node, NULL_TREE);
19876 tree v4hi_ftype_v4hi_di
19877 = build_function_type_list (V4HI_type_node,
19878 V4HI_type_node, long_long_integer_type_node,
19879 NULL_TREE);
19880 tree v2si_ftype_v2si_di
19881 = build_function_type_list (V2SI_type_node,
19882 V2SI_type_node, long_long_integer_type_node,
19883 NULL_TREE);
19884 tree di_ftype_void
19885 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19886 tree int_ftype_void
19887 = build_function_type_list (integer_type_node, NULL_TREE);
19888 tree di_ftype_v8qi
19889 = build_function_type_list (long_long_integer_type_node,
19890 V8QI_type_node, NULL_TREE);
19891 tree di_ftype_v4hi
19892 = build_function_type_list (long_long_integer_type_node,
19893 V4HI_type_node, NULL_TREE);
19894 tree di_ftype_v2si
19895 = build_function_type_list (long_long_integer_type_node,
19896 V2SI_type_node, NULL_TREE);
19897 tree v2si_ftype_v4hi
19898 = build_function_type_list (V2SI_type_node,
19899 V4HI_type_node, NULL_TREE);
19900 tree v4hi_ftype_v8qi
19901 = build_function_type_list (V4HI_type_node,
19902 V8QI_type_node, NULL_TREE);
19903 tree v8qi_ftype_v8qi
19904 = build_function_type_list (V8QI_type_node,
19905 V8QI_type_node, NULL_TREE);
19906 tree v4hi_ftype_v4hi
19907 = build_function_type_list (V4HI_type_node,
19908 V4HI_type_node, NULL_TREE);
19909 tree v2si_ftype_v2si
19910 = build_function_type_list (V2SI_type_node,
19911 V2SI_type_node, NULL_TREE);
19912
19913 tree di_ftype_di_v4hi_v4hi
19914 = build_function_type_list (long_long_unsigned_type_node,
19915 long_long_unsigned_type_node,
19916 V4HI_type_node, V4HI_type_node,
19917 NULL_TREE);
19918
19919 tree di_ftype_v4hi_v4hi
19920 = build_function_type_list (long_long_unsigned_type_node,
19921 V4HI_type_node,V4HI_type_node,
19922 NULL_TREE);
19923
19924 tree v2si_ftype_v2si_v4hi_v4hi
19925 = build_function_type_list (V2SI_type_node,
19926 V2SI_type_node, V4HI_type_node,
19927 V4HI_type_node, NULL_TREE);
19928
19929 tree v2si_ftype_v2si_v8qi_v8qi
19930 = build_function_type_list (V2SI_type_node,
19931 V2SI_type_node, V8QI_type_node,
19932 V8QI_type_node, NULL_TREE);
19933
19934 tree di_ftype_di_v2si_v2si
19935 = build_function_type_list (long_long_unsigned_type_node,
19936 long_long_unsigned_type_node,
19937 V2SI_type_node, V2SI_type_node,
19938 NULL_TREE);
19939
19940 tree di_ftype_di_di_int
19941 = build_function_type_list (long_long_unsigned_type_node,
19942 long_long_unsigned_type_node,
19943 long_long_unsigned_type_node,
19944 integer_type_node, NULL_TREE);
19945
19946 tree void_ftype_int
19947 = build_function_type_list (void_type_node,
19948 integer_type_node, NULL_TREE);
19949
19950 tree v8qi_ftype_char
19951 = build_function_type_list (V8QI_type_node,
19952 signed_char_type_node, NULL_TREE);
19953
19954 tree v4hi_ftype_short
19955 = build_function_type_list (V4HI_type_node,
19956 short_integer_type_node, NULL_TREE);
19957
19958 tree v2si_ftype_int
19959 = build_function_type_list (V2SI_type_node,
19960 integer_type_node, NULL_TREE);
19961
19962 /* Normal vector binops. */
19963 tree v8qi_ftype_v8qi_v8qi
19964 = build_function_type_list (V8QI_type_node,
19965 V8QI_type_node, V8QI_type_node, NULL_TREE);
19966 tree v4hi_ftype_v4hi_v4hi
19967 = build_function_type_list (V4HI_type_node,
19968 V4HI_type_node,V4HI_type_node, NULL_TREE);
19969 tree v2si_ftype_v2si_v2si
19970 = build_function_type_list (V2SI_type_node,
19971 V2SI_type_node, V2SI_type_node, NULL_TREE);
19972 tree di_ftype_di_di
19973 = build_function_type_list (long_long_unsigned_type_node,
19974 long_long_unsigned_type_node,
19975 long_long_unsigned_type_node,
19976 NULL_TREE);
19977
19978 /* Add all builtins that are more or less simple operations on two
19979 operands. */
19980 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19981 {
19982 /* Use one of the operands; the target can have a different mode for
19983 mask-generating compares. */
19984 enum machine_mode mode;
19985 tree type;
19986
19987 if (d->name == 0)
19988 continue;
19989
19990 mode = insn_data[d->icode].operand[1].mode;
19991
19992 switch (mode)
19993 {
19994 case V8QImode:
19995 type = v8qi_ftype_v8qi_v8qi;
19996 break;
19997 case V4HImode:
19998 type = v4hi_ftype_v4hi_v4hi;
19999 break;
20000 case V2SImode:
20001 type = v2si_ftype_v2si_v2si;
20002 break;
20003 case DImode:
20004 type = di_ftype_di_di;
20005 break;
20006
20007 default:
20008 gcc_unreachable ();
20009 }
20010
20011 def_mbuiltin (d->mask, d->name, type, d->code);
20012 }
20013
20014 /* Add the remaining MMX insns with somewhat more complicated types. */
20015 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20017 ARM_BUILTIN_ ## CODE)
20018
20019 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20020 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20021 ARM_BUILTIN_ ## CODE)
20022
20023 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20024 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20025 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20026 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20027 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20028 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20029 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20030 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20031 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20032
20033 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20034 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20035 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20036 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20037 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20038 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20039
20040 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20041 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20042 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20043 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20044 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20045 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20046
20047 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20048 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20049 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20050 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20051 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20052 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20053
20054 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20055 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20056 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20057 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20058 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20059 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20060
20061 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20062
20063 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20064 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20065 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20066 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20067 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20068 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20069 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20070 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20071 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20072 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20073
20074 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20075 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20076 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20077 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20078 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20079 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20080 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20081 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20082 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20083
20084 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20085 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20086 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20087
20088 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20089 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20090 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20091
20092 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20093 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20094
20095 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20096 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20097 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20098 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20099 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20100 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20101
20102 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20103 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20104 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20105 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20106 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20107 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20108 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20109 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20110 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20111 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20112 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20113 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20114
20115 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20116 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20117 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20118 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20119
20120 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20121 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20122 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20123 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20124 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20125 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20126 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20127
20128 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20129 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20130 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20131
20132 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20133 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20134 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20135 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20136
20137 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20138 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20139 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20140 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20141
20142 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20143 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20144 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20145 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20146
20147 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20148 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20149 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20150 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20151
20152 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20153 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20154 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20155 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20156
20157 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20158 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20159 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20160 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20161
20162 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20163
20164 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20165 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20166 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20167
20168 #undef iwmmx_mbuiltin
20169 #undef iwmmx2_mbuiltin
20170 }
20171
20172 static void
20173 arm_init_tls_builtins (void)
20174 {
20175 tree ftype, decl;
20176
20177 ftype = build_function_type (ptr_type_node, void_list_node);
20178 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20179 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20180 NULL, NULL_TREE);
20181 TREE_NOTHROW (decl) = 1;
20182 TREE_READONLY (decl) = 1;
20183 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20184 }
20185
20186 static void
20187 arm_init_fp16_builtins (void)
20188 {
20189 tree fp16_type = make_node (REAL_TYPE);
20190 TYPE_PRECISION (fp16_type) = 16;
20191 layout_type (fp16_type);
20192 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20193 }
20194
20195 static void
20196 arm_init_builtins (void)
20197 {
20198 arm_init_tls_builtins ();
20199
20200 if (TARGET_REALLY_IWMMXT)
20201 arm_init_iwmmxt_builtins ();
20202
20203 if (TARGET_NEON)
20204 arm_init_neon_builtins ();
20205
20206 if (arm_fp16_format)
20207 arm_init_fp16_builtins ();
20208 }
20209
20210 /* Return the ARM builtin for CODE. */
20211
20212 static tree
20213 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20214 {
20215 if (code >= ARM_BUILTIN_MAX)
20216 return error_mark_node;
20217
20218 return arm_builtin_decls[code];
20219 }
20220
20221 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20222
20223 static const char *
20224 arm_invalid_parameter_type (const_tree t)
20225 {
20226 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20227 return N_("function parameters cannot have __fp16 type");
20228 return NULL;
20229 }
20230
20231 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20232
20233 static const char *
20234 arm_invalid_return_type (const_tree t)
20235 {
20236 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20237 return N_("functions cannot return __fp16 type");
20238 return NULL;
20239 }
20240
20241 /* Implement TARGET_PROMOTED_TYPE. */
20242
20243 static tree
20244 arm_promoted_type (const_tree t)
20245 {
20246 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20247 return float_type_node;
20248 return NULL_TREE;
20249 }
20250
20251 /* Implement TARGET_CONVERT_TO_TYPE.
20252 Specifically, this hook implements the peculiarity of the ARM
20253 half-precision floating-point C semantics that requires conversions between
20254 __fp16 to or from double to do an intermediate conversion to float. */
20255
20256 static tree
20257 arm_convert_to_type (tree type, tree expr)
20258 {
20259 tree fromtype = TREE_TYPE (expr);
20260 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20261 return NULL_TREE;
20262 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20263 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20264 return convert (type, convert (float_type_node, expr));
20265 return NULL_TREE;
20266 }
20267
20268 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20269 This simply adds HFmode as a supported mode; even though we don't
20270 implement arithmetic on this type directly, it's supported by
20271 optabs conversions, much the way the double-word arithmetic is
20272 special-cased in the default hook. */
20273
20274 static bool
20275 arm_scalar_mode_supported_p (enum machine_mode mode)
20276 {
20277 if (mode == HFmode)
20278 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20279 else if (ALL_FIXED_POINT_MODE_P (mode))
20280 return true;
20281 else
20282 return default_scalar_mode_supported_p (mode);
20283 }
20284
20285 /* Errors in the source file can cause expand_expr to return const0_rtx
20286 where we expect a vector. To avoid crashing, use one of the vector
20287 clear instructions. */
20288
20289 static rtx
20290 safe_vector_operand (rtx x, enum machine_mode mode)
20291 {
20292 if (x != const0_rtx)
20293 return x;
20294 x = gen_reg_rtx (mode);
20295
20296 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20297 : gen_rtx_SUBREG (DImode, x, 0)));
20298 return x;
20299 }
20300
20301 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20302
20303 static rtx
20304 arm_expand_binop_builtin (enum insn_code icode,
20305 tree exp, rtx target)
20306 {
20307 rtx pat;
20308 tree arg0 = CALL_EXPR_ARG (exp, 0);
20309 tree arg1 = CALL_EXPR_ARG (exp, 1);
20310 rtx op0 = expand_normal (arg0);
20311 rtx op1 = expand_normal (arg1);
20312 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20313 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20314 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20315
20316 if (VECTOR_MODE_P (mode0))
20317 op0 = safe_vector_operand (op0, mode0);
20318 if (VECTOR_MODE_P (mode1))
20319 op1 = safe_vector_operand (op1, mode1);
20320
20321 if (! target
20322 || GET_MODE (target) != tmode
20323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20324 target = gen_reg_rtx (tmode);
20325
20326 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20327 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20328
20329 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20330 op0 = copy_to_mode_reg (mode0, op0);
20331 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20332 op1 = copy_to_mode_reg (mode1, op1);
20333
20334 pat = GEN_FCN (icode) (target, op0, op1);
20335 if (! pat)
20336 return 0;
20337 emit_insn (pat);
20338 return target;
20339 }
20340
20341 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20342
20343 static rtx
20344 arm_expand_unop_builtin (enum insn_code icode,
20345 tree exp, rtx target, int do_load)
20346 {
20347 rtx pat;
20348 tree arg0 = CALL_EXPR_ARG (exp, 0);
20349 rtx op0 = expand_normal (arg0);
20350 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20351 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20352
20353 if (! target
20354 || GET_MODE (target) != tmode
20355 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20356 target = gen_reg_rtx (tmode);
20357 if (do_load)
20358 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20359 else
20360 {
20361 if (VECTOR_MODE_P (mode0))
20362 op0 = safe_vector_operand (op0, mode0);
20363
20364 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20365 op0 = copy_to_mode_reg (mode0, op0);
20366 }
20367
20368 pat = GEN_FCN (icode) (target, op0);
20369 if (! pat)
20370 return 0;
20371 emit_insn (pat);
20372 return target;
20373 }
20374
20375 typedef enum {
20376 NEON_ARG_COPY_TO_REG,
20377 NEON_ARG_CONSTANT,
20378 NEON_ARG_MEMORY,
20379 NEON_ARG_STOP
20380 } builtin_arg;
20381
20382 #define NEON_MAX_BUILTIN_ARGS 5
20383
20384 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20385 and return an expression for the accessed memory.
20386
20387 The intrinsic function operates on a block of registers that has
20388 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20389 function references the memory at EXP of type TYPE and in mode
20390 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20391 available. */
20392
20393 static tree
20394 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
20395 enum machine_mode reg_mode,
20396 neon_builtin_type_mode type_mode)
20397 {
20398 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20399 tree elem_type, upper_bound, array_type;
20400
20401 /* Work out the size of the register block in bytes. */
20402 reg_size = GET_MODE_SIZE (reg_mode);
20403
20404 /* Work out the size of each vector in bytes. */
20405 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20406 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20407
20408 /* Work out how many vectors there are. */
20409 gcc_assert (reg_size % vector_size == 0);
20410 nvectors = reg_size / vector_size;
20411
20412 /* Work out the type of each element. */
20413 gcc_assert (POINTER_TYPE_P (type));
20414 elem_type = TREE_TYPE (type);
20415
20416 /* Work out how many elements are being loaded or stored.
20417 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20418 and memory elements; anything else implies a lane load or store. */
20419 if (mem_mode == reg_mode)
20420 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
20421 else
20422 nelems = nvectors;
20423
20424 /* Create a type that describes the full access. */
20425 upper_bound = build_int_cst (size_type_node, nelems - 1);
20426 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20427
20428 /* Dereference EXP using that type. */
20429 return fold_build2 (MEM_REF, array_type, exp,
20430 build_int_cst (build_pointer_type (array_type), 0));
20431 }
20432
20433 /* Expand a Neon builtin. */
20434 static rtx
20435 arm_expand_neon_args (rtx target, int icode, int have_retval,
20436 neon_builtin_type_mode type_mode,
20437 tree exp, int fcode, ...)
20438 {
20439 va_list ap;
20440 rtx pat;
20441 tree arg[NEON_MAX_BUILTIN_ARGS];
20442 rtx op[NEON_MAX_BUILTIN_ARGS];
20443 tree arg_type;
20444 tree formals;
20445 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20446 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20447 enum machine_mode other_mode;
20448 int argc = 0;
20449 int opno;
20450
20451 if (have_retval
20452 && (!target
20453 || GET_MODE (target) != tmode
20454 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20455 target = gen_reg_rtx (tmode);
20456
20457 va_start (ap, fcode);
20458
20459 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
20460
20461 for (;;)
20462 {
20463 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20464
20465 if (thisarg == NEON_ARG_STOP)
20466 break;
20467 else
20468 {
20469 opno = argc + have_retval;
20470 mode[argc] = insn_data[icode].operand[opno].mode;
20471 arg[argc] = CALL_EXPR_ARG (exp, argc);
20472 arg_type = TREE_VALUE (formals);
20473 if (thisarg == NEON_ARG_MEMORY)
20474 {
20475 other_mode = insn_data[icode].operand[1 - opno].mode;
20476 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
20477 mode[argc], other_mode,
20478 type_mode);
20479 }
20480
20481 op[argc] = expand_normal (arg[argc]);
20482
20483 switch (thisarg)
20484 {
20485 case NEON_ARG_COPY_TO_REG:
20486 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20487 if (!(*insn_data[icode].operand[opno].predicate)
20488 (op[argc], mode[argc]))
20489 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20490 break;
20491
20492 case NEON_ARG_CONSTANT:
20493 /* FIXME: This error message is somewhat unhelpful. */
20494 if (!(*insn_data[icode].operand[opno].predicate)
20495 (op[argc], mode[argc]))
20496 error ("argument must be a constant");
20497 break;
20498
20499 case NEON_ARG_MEMORY:
20500 gcc_assert (MEM_P (op[argc]));
20501 PUT_MODE (op[argc], mode[argc]);
20502 /* ??? arm_neon.h uses the same built-in functions for signed
20503 and unsigned accesses, casting where necessary. This isn't
20504 alias safe. */
20505 set_mem_alias_set (op[argc], 0);
20506 if (!(*insn_data[icode].operand[opno].predicate)
20507 (op[argc], mode[argc]))
20508 op[argc] = (replace_equiv_address
20509 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20510 break;
20511
20512 case NEON_ARG_STOP:
20513 gcc_unreachable ();
20514 }
20515
20516 argc++;
20517 formals = TREE_CHAIN (formals);
20518 }
20519 }
20520
20521 va_end (ap);
20522
20523 if (have_retval)
20524 switch (argc)
20525 {
20526 case 1:
20527 pat = GEN_FCN (icode) (target, op[0]);
20528 break;
20529
20530 case 2:
20531 pat = GEN_FCN (icode) (target, op[0], op[1]);
20532 break;
20533
20534 case 3:
20535 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20536 break;
20537
20538 case 4:
20539 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20540 break;
20541
20542 case 5:
20543 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20544 break;
20545
20546 default:
20547 gcc_unreachable ();
20548 }
20549 else
20550 switch (argc)
20551 {
20552 case 1:
20553 pat = GEN_FCN (icode) (op[0]);
20554 break;
20555
20556 case 2:
20557 pat = GEN_FCN (icode) (op[0], op[1]);
20558 break;
20559
20560 case 3:
20561 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20562 break;
20563
20564 case 4:
20565 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20566 break;
20567
20568 case 5:
20569 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20570 break;
20571
20572 default:
20573 gcc_unreachable ();
20574 }
20575
20576 if (!pat)
20577 return 0;
20578
20579 emit_insn (pat);
20580
20581 return target;
20582 }
20583
20584 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20585 constants defined per-instruction or per instruction-variant. Instead, the
20586 required info is looked up in the table neon_builtin_data. */
20587 static rtx
20588 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20589 {
20590 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20591 neon_itype itype = d->itype;
20592 enum insn_code icode = d->code;
20593 neon_builtin_type_mode type_mode = d->mode;
20594
20595 switch (itype)
20596 {
20597 case NEON_UNOP:
20598 case NEON_CONVERT:
20599 case NEON_DUPLANE:
20600 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20601 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20602
20603 case NEON_BINOP:
20604 case NEON_SETLANE:
20605 case NEON_SCALARMUL:
20606 case NEON_SCALARMULL:
20607 case NEON_SCALARMULH:
20608 case NEON_SHIFTINSERT:
20609 case NEON_LOGICBINOP:
20610 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20611 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20612 NEON_ARG_STOP);
20613
20614 case NEON_TERNOP:
20615 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20616 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20617 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20618
20619 case NEON_GETLANE:
20620 case NEON_FIXCONV:
20621 case NEON_SHIFTIMM:
20622 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20623 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20624 NEON_ARG_STOP);
20625
20626 case NEON_CREATE:
20627 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20628 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20629
20630 case NEON_DUP:
20631 case NEON_SPLIT:
20632 case NEON_REINTERP:
20633 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20634 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20635
20636 case NEON_COMBINE:
20637 case NEON_VTBL:
20638 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20639 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20640
20641 case NEON_RESULTPAIR:
20642 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20643 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20644 NEON_ARG_STOP);
20645
20646 case NEON_LANEMUL:
20647 case NEON_LANEMULL:
20648 case NEON_LANEMULH:
20649 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20650 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20651 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20652
20653 case NEON_LANEMAC:
20654 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20655 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20656 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20657
20658 case NEON_SHIFTACC:
20659 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20660 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20661 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20662
20663 case NEON_SCALARMAC:
20664 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20665 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20666 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20667
20668 case NEON_SELECT:
20669 case NEON_VTBX:
20670 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20671 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20672 NEON_ARG_STOP);
20673
20674 case NEON_LOAD1:
20675 case NEON_LOADSTRUCT:
20676 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20677 NEON_ARG_MEMORY, NEON_ARG_STOP);
20678
20679 case NEON_LOAD1LANE:
20680 case NEON_LOADSTRUCTLANE:
20681 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20682 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20683 NEON_ARG_STOP);
20684
20685 case NEON_STORE1:
20686 case NEON_STORESTRUCT:
20687 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20688 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20689
20690 case NEON_STORE1LANE:
20691 case NEON_STORESTRUCTLANE:
20692 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20693 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20694 NEON_ARG_STOP);
20695 }
20696
20697 gcc_unreachable ();
20698 }
20699
20700 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20701 void
20702 neon_reinterpret (rtx dest, rtx src)
20703 {
20704 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20705 }
20706
20707 /* Emit code to place a Neon pair result in memory locations (with equal
20708 registers). */
20709 void
20710 neon_emit_pair_result_insn (enum machine_mode mode,
20711 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20712 rtx op1, rtx op2)
20713 {
20714 rtx mem = gen_rtx_MEM (mode, destaddr);
20715 rtx tmp1 = gen_reg_rtx (mode);
20716 rtx tmp2 = gen_reg_rtx (mode);
20717
20718 emit_insn (intfn (tmp1, op1, op2, tmp2));
20719
20720 emit_move_insn (mem, tmp1);
20721 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20722 emit_move_insn (mem, tmp2);
20723 }
20724
20725 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20726 not to early-clobber SRC registers in the process.
20727
20728 We assume that the operands described by SRC and DEST represent a
20729 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20730 number of components into which the copy has been decomposed. */
20731 void
20732 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20733 {
20734 unsigned int i;
20735
20736 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20737 || REGNO (operands[0]) < REGNO (operands[1]))
20738 {
20739 for (i = 0; i < count; i++)
20740 {
20741 operands[2 * i] = dest[i];
20742 operands[2 * i + 1] = src[i];
20743 }
20744 }
20745 else
20746 {
20747 for (i = 0; i < count; i++)
20748 {
20749 operands[2 * i] = dest[count - i - 1];
20750 operands[2 * i + 1] = src[count - i - 1];
20751 }
20752 }
20753 }
20754
20755 /* Split operands into moves from op[1] + op[2] into op[0]. */
20756
20757 void
20758 neon_split_vcombine (rtx operands[3])
20759 {
20760 unsigned int dest = REGNO (operands[0]);
20761 unsigned int src1 = REGNO (operands[1]);
20762 unsigned int src2 = REGNO (operands[2]);
20763 enum machine_mode halfmode = GET_MODE (operands[1]);
20764 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20765 rtx destlo, desthi;
20766
20767 if (src1 == dest && src2 == dest + halfregs)
20768 {
20769 /* No-op move. Can't split to nothing; emit something. */
20770 emit_note (NOTE_INSN_DELETED);
20771 return;
20772 }
20773
20774 /* Preserve register attributes for variable tracking. */
20775 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20776 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20777 GET_MODE_SIZE (halfmode));
20778
20779 /* Special case of reversed high/low parts. Use VSWP. */
20780 if (src2 == dest && src1 == dest + halfregs)
20781 {
20782 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20783 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20784 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20785 return;
20786 }
20787
20788 if (!reg_overlap_mentioned_p (operands[2], destlo))
20789 {
20790 /* Try to avoid unnecessary moves if part of the result
20791 is in the right place already. */
20792 if (src1 != dest)
20793 emit_move_insn (destlo, operands[1]);
20794 if (src2 != dest + halfregs)
20795 emit_move_insn (desthi, operands[2]);
20796 }
20797 else
20798 {
20799 if (src2 != dest + halfregs)
20800 emit_move_insn (desthi, operands[2]);
20801 if (src1 != dest)
20802 emit_move_insn (destlo, operands[1]);
20803 }
20804 }
20805
20806 /* Expand an expression EXP that calls a built-in function,
20807 with result going to TARGET if that's convenient
20808 (and in mode MODE if that's convenient).
20809 SUBTARGET may be used as the target for computing one of EXP's operands.
20810 IGNORE is nonzero if the value is to be ignored. */
20811
20812 static rtx
20813 arm_expand_builtin (tree exp,
20814 rtx target,
20815 rtx subtarget ATTRIBUTE_UNUSED,
20816 enum machine_mode mode ATTRIBUTE_UNUSED,
20817 int ignore ATTRIBUTE_UNUSED)
20818 {
20819 const struct builtin_description * d;
20820 enum insn_code icode;
20821 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20822 tree arg0;
20823 tree arg1;
20824 tree arg2;
20825 rtx op0;
20826 rtx op1;
20827 rtx op2;
20828 rtx pat;
20829 int fcode = DECL_FUNCTION_CODE (fndecl);
20830 size_t i;
20831 enum machine_mode tmode;
20832 enum machine_mode mode0;
20833 enum machine_mode mode1;
20834 enum machine_mode mode2;
20835 int opint;
20836 int selector;
20837 int mask;
20838 int imm;
20839
20840 if (fcode >= ARM_BUILTIN_NEON_BASE)
20841 return arm_expand_neon_builtin (fcode, exp, target);
20842
20843 switch (fcode)
20844 {
20845 case ARM_BUILTIN_TEXTRMSB:
20846 case ARM_BUILTIN_TEXTRMUB:
20847 case ARM_BUILTIN_TEXTRMSH:
20848 case ARM_BUILTIN_TEXTRMUH:
20849 case ARM_BUILTIN_TEXTRMSW:
20850 case ARM_BUILTIN_TEXTRMUW:
20851 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20852 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20853 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20854 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20855 : CODE_FOR_iwmmxt_textrmw);
20856
20857 arg0 = CALL_EXPR_ARG (exp, 0);
20858 arg1 = CALL_EXPR_ARG (exp, 1);
20859 op0 = expand_normal (arg0);
20860 op1 = expand_normal (arg1);
20861 tmode = insn_data[icode].operand[0].mode;
20862 mode0 = insn_data[icode].operand[1].mode;
20863 mode1 = insn_data[icode].operand[2].mode;
20864
20865 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20866 op0 = copy_to_mode_reg (mode0, op0);
20867 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20868 {
20869 /* @@@ better error message */
20870 error ("selector must be an immediate");
20871 return gen_reg_rtx (tmode);
20872 }
20873
20874 opint = INTVAL (op1);
20875 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20876 {
20877 if (opint > 7 || opint < 0)
20878 error ("the range of selector should be in 0 to 7");
20879 }
20880 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20881 {
20882 if (opint > 3 || opint < 0)
20883 error ("the range of selector should be in 0 to 3");
20884 }
20885 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20886 {
20887 if (opint > 1 || opint < 0)
20888 error ("the range of selector should be in 0 to 1");
20889 }
20890
20891 if (target == 0
20892 || GET_MODE (target) != tmode
20893 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20894 target = gen_reg_rtx (tmode);
20895 pat = GEN_FCN (icode) (target, op0, op1);
20896 if (! pat)
20897 return 0;
20898 emit_insn (pat);
20899 return target;
20900
20901 case ARM_BUILTIN_WALIGNI:
20902 /* If op2 is immediate, call walighi, else call walighr. */
20903 arg0 = CALL_EXPR_ARG (exp, 0);
20904 arg1 = CALL_EXPR_ARG (exp, 1);
20905 arg2 = CALL_EXPR_ARG (exp, 2);
20906 op0 = expand_normal (arg0);
20907 op1 = expand_normal (arg1);
20908 op2 = expand_normal (arg2);
20909 if (CONST_INT_P (op2))
20910 {
20911 icode = CODE_FOR_iwmmxt_waligni;
20912 tmode = insn_data[icode].operand[0].mode;
20913 mode0 = insn_data[icode].operand[1].mode;
20914 mode1 = insn_data[icode].operand[2].mode;
20915 mode2 = insn_data[icode].operand[3].mode;
20916 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20917 op0 = copy_to_mode_reg (mode0, op0);
20918 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20919 op1 = copy_to_mode_reg (mode1, op1);
20920 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
20921 selector = INTVAL (op2);
20922 if (selector > 7 || selector < 0)
20923 error ("the range of selector should be in 0 to 7");
20924 }
20925 else
20926 {
20927 icode = CODE_FOR_iwmmxt_walignr;
20928 tmode = insn_data[icode].operand[0].mode;
20929 mode0 = insn_data[icode].operand[1].mode;
20930 mode1 = insn_data[icode].operand[2].mode;
20931 mode2 = insn_data[icode].operand[3].mode;
20932 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20933 op0 = copy_to_mode_reg (mode0, op0);
20934 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20935 op1 = copy_to_mode_reg (mode1, op1);
20936 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
20937 op2 = copy_to_mode_reg (mode2, op2);
20938 }
20939 if (target == 0
20940 || GET_MODE (target) != tmode
20941 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20942 target = gen_reg_rtx (tmode);
20943 pat = GEN_FCN (icode) (target, op0, op1, op2);
20944 if (!pat)
20945 return 0;
20946 emit_insn (pat);
20947 return target;
20948
20949 case ARM_BUILTIN_TINSRB:
20950 case ARM_BUILTIN_TINSRH:
20951 case ARM_BUILTIN_TINSRW:
20952 case ARM_BUILTIN_WMERGE:
20953 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20954 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20955 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
20956 : CODE_FOR_iwmmxt_tinsrw);
20957 arg0 = CALL_EXPR_ARG (exp, 0);
20958 arg1 = CALL_EXPR_ARG (exp, 1);
20959 arg2 = CALL_EXPR_ARG (exp, 2);
20960 op0 = expand_normal (arg0);
20961 op1 = expand_normal (arg1);
20962 op2 = expand_normal (arg2);
20963 tmode = insn_data[icode].operand[0].mode;
20964 mode0 = insn_data[icode].operand[1].mode;
20965 mode1 = insn_data[icode].operand[2].mode;
20966 mode2 = insn_data[icode].operand[3].mode;
20967
20968 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20969 op0 = copy_to_mode_reg (mode0, op0);
20970 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20971 op1 = copy_to_mode_reg (mode1, op1);
20972 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20973 {
20974 error ("selector must be an immediate");
20975 return const0_rtx;
20976 }
20977 if (icode == CODE_FOR_iwmmxt_wmerge)
20978 {
20979 selector = INTVAL (op2);
20980 if (selector > 7 || selector < 0)
20981 error ("the range of selector should be in 0 to 7");
20982 }
20983 if ((icode == CODE_FOR_iwmmxt_tinsrb)
20984 || (icode == CODE_FOR_iwmmxt_tinsrh)
20985 || (icode == CODE_FOR_iwmmxt_tinsrw))
20986 {
20987 mask = 0x01;
20988 selector= INTVAL (op2);
20989 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
20990 error ("the range of selector should be in 0 to 7");
20991 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
20992 error ("the range of selector should be in 0 to 3");
20993 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
20994 error ("the range of selector should be in 0 to 1");
20995 mask <<= selector;
20996 op2 = GEN_INT (mask);
20997 }
20998 if (target == 0
20999 || GET_MODE (target) != tmode
21000 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21001 target = gen_reg_rtx (tmode);
21002 pat = GEN_FCN (icode) (target, op0, op1, op2);
21003 if (! pat)
21004 return 0;
21005 emit_insn (pat);
21006 return target;
21007
21008 case ARM_BUILTIN_SETWCGR0:
21009 case ARM_BUILTIN_SETWCGR1:
21010 case ARM_BUILTIN_SETWCGR2:
21011 case ARM_BUILTIN_SETWCGR3:
21012 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21013 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21014 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21015 : CODE_FOR_iwmmxt_setwcgr3);
21016 arg0 = CALL_EXPR_ARG (exp, 0);
21017 op0 = expand_normal (arg0);
21018 mode0 = insn_data[icode].operand[0].mode;
21019 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21020 op0 = copy_to_mode_reg (mode0, op0);
21021 pat = GEN_FCN (icode) (op0);
21022 if (!pat)
21023 return 0;
21024 emit_insn (pat);
21025 return 0;
21026
21027 case ARM_BUILTIN_GETWCGR0:
21028 case ARM_BUILTIN_GETWCGR1:
21029 case ARM_BUILTIN_GETWCGR2:
21030 case ARM_BUILTIN_GETWCGR3:
21031 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21032 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21033 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21034 : CODE_FOR_iwmmxt_getwcgr3);
21035 tmode = insn_data[icode].operand[0].mode;
21036 if (target == 0
21037 || GET_MODE (target) != tmode
21038 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21039 target = gen_reg_rtx (tmode);
21040 pat = GEN_FCN (icode) (target);
21041 if (!pat)
21042 return 0;
21043 emit_insn (pat);
21044 return target;
21045
21046 case ARM_BUILTIN_WSHUFH:
21047 icode = CODE_FOR_iwmmxt_wshufh;
21048 arg0 = CALL_EXPR_ARG (exp, 0);
21049 arg1 = CALL_EXPR_ARG (exp, 1);
21050 op0 = expand_normal (arg0);
21051 op1 = expand_normal (arg1);
21052 tmode = insn_data[icode].operand[0].mode;
21053 mode1 = insn_data[icode].operand[1].mode;
21054 mode2 = insn_data[icode].operand[2].mode;
21055
21056 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21057 op0 = copy_to_mode_reg (mode1, op0);
21058 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21059 {
21060 error ("mask must be an immediate");
21061 return const0_rtx;
21062 }
21063 selector = INTVAL (op1);
21064 if (selector < 0 || selector > 255)
21065 error ("the range of mask should be in 0 to 255");
21066 if (target == 0
21067 || GET_MODE (target) != tmode
21068 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21069 target = gen_reg_rtx (tmode);
21070 pat = GEN_FCN (icode) (target, op0, op1);
21071 if (! pat)
21072 return 0;
21073 emit_insn (pat);
21074 return target;
21075
21076 case ARM_BUILTIN_WMADDS:
21077 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21078 case ARM_BUILTIN_WMADDSX:
21079 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21080 case ARM_BUILTIN_WMADDSN:
21081 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21082 case ARM_BUILTIN_WMADDU:
21083 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21084 case ARM_BUILTIN_WMADDUX:
21085 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21086 case ARM_BUILTIN_WMADDUN:
21087 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21088 case ARM_BUILTIN_WSADBZ:
21089 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21090 case ARM_BUILTIN_WSADHZ:
21091 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21092
21093 /* Several three-argument builtins. */
21094 case ARM_BUILTIN_WMACS:
21095 case ARM_BUILTIN_WMACU:
21096 case ARM_BUILTIN_TMIA:
21097 case ARM_BUILTIN_TMIAPH:
21098 case ARM_BUILTIN_TMIATT:
21099 case ARM_BUILTIN_TMIATB:
21100 case ARM_BUILTIN_TMIABT:
21101 case ARM_BUILTIN_TMIABB:
21102 case ARM_BUILTIN_WQMIABB:
21103 case ARM_BUILTIN_WQMIABT:
21104 case ARM_BUILTIN_WQMIATB:
21105 case ARM_BUILTIN_WQMIATT:
21106 case ARM_BUILTIN_WQMIABBN:
21107 case ARM_BUILTIN_WQMIABTN:
21108 case ARM_BUILTIN_WQMIATBN:
21109 case ARM_BUILTIN_WQMIATTN:
21110 case ARM_BUILTIN_WMIABB:
21111 case ARM_BUILTIN_WMIABT:
21112 case ARM_BUILTIN_WMIATB:
21113 case ARM_BUILTIN_WMIATT:
21114 case ARM_BUILTIN_WMIABBN:
21115 case ARM_BUILTIN_WMIABTN:
21116 case ARM_BUILTIN_WMIATBN:
21117 case ARM_BUILTIN_WMIATTN:
21118 case ARM_BUILTIN_WMIAWBB:
21119 case ARM_BUILTIN_WMIAWBT:
21120 case ARM_BUILTIN_WMIAWTB:
21121 case ARM_BUILTIN_WMIAWTT:
21122 case ARM_BUILTIN_WMIAWBBN:
21123 case ARM_BUILTIN_WMIAWBTN:
21124 case ARM_BUILTIN_WMIAWTBN:
21125 case ARM_BUILTIN_WMIAWTTN:
21126 case ARM_BUILTIN_WSADB:
21127 case ARM_BUILTIN_WSADH:
21128 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21129 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21130 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21131 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21132 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21133 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21134 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21135 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21136 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21137 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21138 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21139 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21140 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21141 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21142 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21143 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21144 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21145 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21146 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21147 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21148 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21149 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21150 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21151 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21152 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21153 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21154 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21155 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21156 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21157 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21158 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21159 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21160 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21161 : CODE_FOR_iwmmxt_wsadh);
21162 arg0 = CALL_EXPR_ARG (exp, 0);
21163 arg1 = CALL_EXPR_ARG (exp, 1);
21164 arg2 = CALL_EXPR_ARG (exp, 2);
21165 op0 = expand_normal (arg0);
21166 op1 = expand_normal (arg1);
21167 op2 = expand_normal (arg2);
21168 tmode = insn_data[icode].operand[0].mode;
21169 mode0 = insn_data[icode].operand[1].mode;
21170 mode1 = insn_data[icode].operand[2].mode;
21171 mode2 = insn_data[icode].operand[3].mode;
21172
21173 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21174 op0 = copy_to_mode_reg (mode0, op0);
21175 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21176 op1 = copy_to_mode_reg (mode1, op1);
21177 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21178 op2 = copy_to_mode_reg (mode2, op2);
21179 if (target == 0
21180 || GET_MODE (target) != tmode
21181 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21182 target = gen_reg_rtx (tmode);
21183 pat = GEN_FCN (icode) (target, op0, op1, op2);
21184 if (! pat)
21185 return 0;
21186 emit_insn (pat);
21187 return target;
21188
21189 case ARM_BUILTIN_WZERO:
21190 target = gen_reg_rtx (DImode);
21191 emit_insn (gen_iwmmxt_clrdi (target));
21192 return target;
21193
21194 case ARM_BUILTIN_WSRLHI:
21195 case ARM_BUILTIN_WSRLWI:
21196 case ARM_BUILTIN_WSRLDI:
21197 case ARM_BUILTIN_WSLLHI:
21198 case ARM_BUILTIN_WSLLWI:
21199 case ARM_BUILTIN_WSLLDI:
21200 case ARM_BUILTIN_WSRAHI:
21201 case ARM_BUILTIN_WSRAWI:
21202 case ARM_BUILTIN_WSRADI:
21203 case ARM_BUILTIN_WRORHI:
21204 case ARM_BUILTIN_WRORWI:
21205 case ARM_BUILTIN_WRORDI:
21206 case ARM_BUILTIN_WSRLH:
21207 case ARM_BUILTIN_WSRLW:
21208 case ARM_BUILTIN_WSRLD:
21209 case ARM_BUILTIN_WSLLH:
21210 case ARM_BUILTIN_WSLLW:
21211 case ARM_BUILTIN_WSLLD:
21212 case ARM_BUILTIN_WSRAH:
21213 case ARM_BUILTIN_WSRAW:
21214 case ARM_BUILTIN_WSRAD:
21215 case ARM_BUILTIN_WRORH:
21216 case ARM_BUILTIN_WRORW:
21217 case ARM_BUILTIN_WRORD:
21218 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21219 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21220 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21221 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21222 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21223 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21224 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21225 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21226 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21227 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21228 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21229 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21230 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21231 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21232 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21233 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21234 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21235 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21236 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21237 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21238 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21239 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21240 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21241 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21242 : CODE_FOR_nothing);
21243 arg1 = CALL_EXPR_ARG (exp, 1);
21244 op1 = expand_normal (arg1);
21245 if (GET_MODE (op1) == VOIDmode)
21246 {
21247 imm = INTVAL (op1);
21248 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21249 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21250 && (imm < 0 || imm > 32))
21251 {
21252 if (fcode == ARM_BUILTIN_WRORHI)
21253 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21254 else if (fcode == ARM_BUILTIN_WRORWI)
21255 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21256 else if (fcode == ARM_BUILTIN_WRORH)
21257 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21258 else
21259 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21260 }
21261 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21262 && (imm < 0 || imm > 64))
21263 {
21264 if (fcode == ARM_BUILTIN_WRORDI)
21265 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21266 else
21267 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21268 }
21269 else if (imm < 0)
21270 {
21271 if (fcode == ARM_BUILTIN_WSRLHI)
21272 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21273 else if (fcode == ARM_BUILTIN_WSRLWI)
21274 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21275 else if (fcode == ARM_BUILTIN_WSRLDI)
21276 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21277 else if (fcode == ARM_BUILTIN_WSLLHI)
21278 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21279 else if (fcode == ARM_BUILTIN_WSLLWI)
21280 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21281 else if (fcode == ARM_BUILTIN_WSLLDI)
21282 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21283 else if (fcode == ARM_BUILTIN_WSRAHI)
21284 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21285 else if (fcode == ARM_BUILTIN_WSRAWI)
21286 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21287 else if (fcode == ARM_BUILTIN_WSRADI)
21288 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21289 else if (fcode == ARM_BUILTIN_WSRLH)
21290 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21291 else if (fcode == ARM_BUILTIN_WSRLW)
21292 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21293 else if (fcode == ARM_BUILTIN_WSRLD)
21294 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21295 else if (fcode == ARM_BUILTIN_WSLLH)
21296 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21297 else if (fcode == ARM_BUILTIN_WSLLW)
21298 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21299 else if (fcode == ARM_BUILTIN_WSLLD)
21300 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21301 else if (fcode == ARM_BUILTIN_WSRAH)
21302 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21303 else if (fcode == ARM_BUILTIN_WSRAW)
21304 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21305 else
21306 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21307 }
21308 }
21309 return arm_expand_binop_builtin (icode, exp, target);
21310
21311 case ARM_BUILTIN_THREAD_POINTER:
21312 return arm_load_tp (target);
21313
21314 default:
21315 break;
21316 }
21317
21318 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21319 if (d->code == (const enum arm_builtins) fcode)
21320 return arm_expand_binop_builtin (d->icode, exp, target);
21321
21322 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21323 if (d->code == (const enum arm_builtins) fcode)
21324 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21325
21326 /* @@@ Should really do something sensible here. */
21327 return NULL_RTX;
21328 }
21329 \f
21330 /* Return the number (counting from 0) of
21331 the least significant set bit in MASK. */
21332
21333 inline static int
21334 number_of_first_bit_set (unsigned mask)
21335 {
21336 return ctz_hwi (mask);
21337 }
21338
21339 /* Like emit_multi_reg_push, but allowing for a different set of
21340 registers to be described as saved. MASK is the set of registers
21341 to be saved; REAL_REGS is the set of registers to be described as
21342 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21343
21344 static rtx
21345 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21346 {
21347 unsigned long regno;
21348 rtx par[10], tmp, reg, insn;
21349 int i, j;
21350
21351 /* Build the parallel of the registers actually being stored. */
21352 for (i = 0; mask; ++i, mask &= mask - 1)
21353 {
21354 regno = ctz_hwi (mask);
21355 reg = gen_rtx_REG (SImode, regno);
21356
21357 if (i == 0)
21358 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21359 else
21360 tmp = gen_rtx_USE (VOIDmode, reg);
21361
21362 par[i] = tmp;
21363 }
21364
21365 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21366 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21367 tmp = gen_frame_mem (BLKmode, tmp);
21368 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21369 par[0] = tmp;
21370
21371 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21372 insn = emit_insn (tmp);
21373
21374 /* Always build the stack adjustment note for unwind info. */
21375 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21376 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21377 par[0] = tmp;
21378
21379 /* Build the parallel of the registers recorded as saved for unwind. */
21380 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21381 {
21382 regno = ctz_hwi (real_regs);
21383 reg = gen_rtx_REG (SImode, regno);
21384
21385 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21386 tmp = gen_frame_mem (SImode, tmp);
21387 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21388 RTX_FRAME_RELATED_P (tmp) = 1;
21389 par[j + 1] = tmp;
21390 }
21391
21392 if (j == 0)
21393 tmp = par[0];
21394 else
21395 {
21396 RTX_FRAME_RELATED_P (par[0]) = 1;
21397 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21398 }
21399
21400 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21401
21402 return insn;
21403 }
21404
21405 /* Emit code to push or pop registers to or from the stack. F is the
21406 assembly file. MASK is the registers to pop. */
21407 static void
21408 thumb_pop (FILE *f, unsigned long mask)
21409 {
21410 int regno;
21411 int lo_mask = mask & 0xFF;
21412 int pushed_words = 0;
21413
21414 gcc_assert (mask);
21415
21416 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21417 {
21418 /* Special case. Do not generate a POP PC statement here, do it in
21419 thumb_exit() */
21420 thumb_exit (f, -1);
21421 return;
21422 }
21423
21424 fprintf (f, "\tpop\t{");
21425
21426 /* Look at the low registers first. */
21427 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21428 {
21429 if (lo_mask & 1)
21430 {
21431 asm_fprintf (f, "%r", regno);
21432
21433 if ((lo_mask & ~1) != 0)
21434 fprintf (f, ", ");
21435
21436 pushed_words++;
21437 }
21438 }
21439
21440 if (mask & (1 << PC_REGNUM))
21441 {
21442 /* Catch popping the PC. */
21443 if (TARGET_INTERWORK || TARGET_BACKTRACE
21444 || crtl->calls_eh_return)
21445 {
21446 /* The PC is never poped directly, instead
21447 it is popped into r3 and then BX is used. */
21448 fprintf (f, "}\n");
21449
21450 thumb_exit (f, -1);
21451
21452 return;
21453 }
21454 else
21455 {
21456 if (mask & 0xFF)
21457 fprintf (f, ", ");
21458
21459 asm_fprintf (f, "%r", PC_REGNUM);
21460 }
21461 }
21462
21463 fprintf (f, "}\n");
21464 }
21465
21466 /* Generate code to return from a thumb function.
21467 If 'reg_containing_return_addr' is -1, then the return address is
21468 actually on the stack, at the stack pointer. */
21469 static void
21470 thumb_exit (FILE *f, int reg_containing_return_addr)
21471 {
21472 unsigned regs_available_for_popping;
21473 unsigned regs_to_pop;
21474 int pops_needed;
21475 unsigned available;
21476 unsigned required;
21477 int mode;
21478 int size;
21479 int restore_a4 = FALSE;
21480
21481 /* Compute the registers we need to pop. */
21482 regs_to_pop = 0;
21483 pops_needed = 0;
21484
21485 if (reg_containing_return_addr == -1)
21486 {
21487 regs_to_pop |= 1 << LR_REGNUM;
21488 ++pops_needed;
21489 }
21490
21491 if (TARGET_BACKTRACE)
21492 {
21493 /* Restore the (ARM) frame pointer and stack pointer. */
21494 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21495 pops_needed += 2;
21496 }
21497
21498 /* If there is nothing to pop then just emit the BX instruction and
21499 return. */
21500 if (pops_needed == 0)
21501 {
21502 if (crtl->calls_eh_return)
21503 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21504
21505 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21506 return;
21507 }
21508 /* Otherwise if we are not supporting interworking and we have not created
21509 a backtrace structure and the function was not entered in ARM mode then
21510 just pop the return address straight into the PC. */
21511 else if (!TARGET_INTERWORK
21512 && !TARGET_BACKTRACE
21513 && !is_called_in_ARM_mode (current_function_decl)
21514 && !crtl->calls_eh_return)
21515 {
21516 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21517 return;
21518 }
21519
21520 /* Find out how many of the (return) argument registers we can corrupt. */
21521 regs_available_for_popping = 0;
21522
21523 /* If returning via __builtin_eh_return, the bottom three registers
21524 all contain information needed for the return. */
21525 if (crtl->calls_eh_return)
21526 size = 12;
21527 else
21528 {
21529 /* If we can deduce the registers used from the function's
21530 return value. This is more reliable that examining
21531 df_regs_ever_live_p () because that will be set if the register is
21532 ever used in the function, not just if the register is used
21533 to hold a return value. */
21534
21535 if (crtl->return_rtx != 0)
21536 mode = GET_MODE (crtl->return_rtx);
21537 else
21538 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21539
21540 size = GET_MODE_SIZE (mode);
21541
21542 if (size == 0)
21543 {
21544 /* In a void function we can use any argument register.
21545 In a function that returns a structure on the stack
21546 we can use the second and third argument registers. */
21547 if (mode == VOIDmode)
21548 regs_available_for_popping =
21549 (1 << ARG_REGISTER (1))
21550 | (1 << ARG_REGISTER (2))
21551 | (1 << ARG_REGISTER (3));
21552 else
21553 regs_available_for_popping =
21554 (1 << ARG_REGISTER (2))
21555 | (1 << ARG_REGISTER (3));
21556 }
21557 else if (size <= 4)
21558 regs_available_for_popping =
21559 (1 << ARG_REGISTER (2))
21560 | (1 << ARG_REGISTER (3));
21561 else if (size <= 8)
21562 regs_available_for_popping =
21563 (1 << ARG_REGISTER (3));
21564 }
21565
21566 /* Match registers to be popped with registers into which we pop them. */
21567 for (available = regs_available_for_popping,
21568 required = regs_to_pop;
21569 required != 0 && available != 0;
21570 available &= ~(available & - available),
21571 required &= ~(required & - required))
21572 -- pops_needed;
21573
21574 /* If we have any popping registers left over, remove them. */
21575 if (available > 0)
21576 regs_available_for_popping &= ~available;
21577
21578 /* Otherwise if we need another popping register we can use
21579 the fourth argument register. */
21580 else if (pops_needed)
21581 {
21582 /* If we have not found any free argument registers and
21583 reg a4 contains the return address, we must move it. */
21584 if (regs_available_for_popping == 0
21585 && reg_containing_return_addr == LAST_ARG_REGNUM)
21586 {
21587 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21588 reg_containing_return_addr = LR_REGNUM;
21589 }
21590 else if (size > 12)
21591 {
21592 /* Register a4 is being used to hold part of the return value,
21593 but we have dire need of a free, low register. */
21594 restore_a4 = TRUE;
21595
21596 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21597 }
21598
21599 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21600 {
21601 /* The fourth argument register is available. */
21602 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21603
21604 --pops_needed;
21605 }
21606 }
21607
21608 /* Pop as many registers as we can. */
21609 thumb_pop (f, regs_available_for_popping);
21610
21611 /* Process the registers we popped. */
21612 if (reg_containing_return_addr == -1)
21613 {
21614 /* The return address was popped into the lowest numbered register. */
21615 regs_to_pop &= ~(1 << LR_REGNUM);
21616
21617 reg_containing_return_addr =
21618 number_of_first_bit_set (regs_available_for_popping);
21619
21620 /* Remove this register for the mask of available registers, so that
21621 the return address will not be corrupted by further pops. */
21622 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21623 }
21624
21625 /* If we popped other registers then handle them here. */
21626 if (regs_available_for_popping)
21627 {
21628 int frame_pointer;
21629
21630 /* Work out which register currently contains the frame pointer. */
21631 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21632
21633 /* Move it into the correct place. */
21634 asm_fprintf (f, "\tmov\t%r, %r\n",
21635 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21636
21637 /* (Temporarily) remove it from the mask of popped registers. */
21638 regs_available_for_popping &= ~(1 << frame_pointer);
21639 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21640
21641 if (regs_available_for_popping)
21642 {
21643 int stack_pointer;
21644
21645 /* We popped the stack pointer as well,
21646 find the register that contains it. */
21647 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21648
21649 /* Move it into the stack register. */
21650 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21651
21652 /* At this point we have popped all necessary registers, so
21653 do not worry about restoring regs_available_for_popping
21654 to its correct value:
21655
21656 assert (pops_needed == 0)
21657 assert (regs_available_for_popping == (1 << frame_pointer))
21658 assert (regs_to_pop == (1 << STACK_POINTER)) */
21659 }
21660 else
21661 {
21662 /* Since we have just move the popped value into the frame
21663 pointer, the popping register is available for reuse, and
21664 we know that we still have the stack pointer left to pop. */
21665 regs_available_for_popping |= (1 << frame_pointer);
21666 }
21667 }
21668
21669 /* If we still have registers left on the stack, but we no longer have
21670 any registers into which we can pop them, then we must move the return
21671 address into the link register and make available the register that
21672 contained it. */
21673 if (regs_available_for_popping == 0 && pops_needed > 0)
21674 {
21675 regs_available_for_popping |= 1 << reg_containing_return_addr;
21676
21677 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21678 reg_containing_return_addr);
21679
21680 reg_containing_return_addr = LR_REGNUM;
21681 }
21682
21683 /* If we have registers left on the stack then pop some more.
21684 We know that at most we will want to pop FP and SP. */
21685 if (pops_needed > 0)
21686 {
21687 int popped_into;
21688 int move_to;
21689
21690 thumb_pop (f, regs_available_for_popping);
21691
21692 /* We have popped either FP or SP.
21693 Move whichever one it is into the correct register. */
21694 popped_into = number_of_first_bit_set (regs_available_for_popping);
21695 move_to = number_of_first_bit_set (regs_to_pop);
21696
21697 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21698
21699 regs_to_pop &= ~(1 << move_to);
21700
21701 --pops_needed;
21702 }
21703
21704 /* If we still have not popped everything then we must have only
21705 had one register available to us and we are now popping the SP. */
21706 if (pops_needed > 0)
21707 {
21708 int popped_into;
21709
21710 thumb_pop (f, regs_available_for_popping);
21711
21712 popped_into = number_of_first_bit_set (regs_available_for_popping);
21713
21714 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21715 /*
21716 assert (regs_to_pop == (1 << STACK_POINTER))
21717 assert (pops_needed == 1)
21718 */
21719 }
21720
21721 /* If necessary restore the a4 register. */
21722 if (restore_a4)
21723 {
21724 if (reg_containing_return_addr != LR_REGNUM)
21725 {
21726 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21727 reg_containing_return_addr = LR_REGNUM;
21728 }
21729
21730 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21731 }
21732
21733 if (crtl->calls_eh_return)
21734 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21735
21736 /* Return to caller. */
21737 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21738 }
21739 \f
21740 /* Scan INSN just before assembler is output for it.
21741 For Thumb-1, we track the status of the condition codes; this
21742 information is used in the cbranchsi4_insn pattern. */
21743 void
21744 thumb1_final_prescan_insn (rtx insn)
21745 {
21746 if (flag_print_asm_name)
21747 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21748 INSN_ADDRESSES (INSN_UID (insn)));
21749 /* Don't overwrite the previous setter when we get to a cbranch. */
21750 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21751 {
21752 enum attr_conds conds;
21753
21754 if (cfun->machine->thumb1_cc_insn)
21755 {
21756 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21757 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21758 CC_STATUS_INIT;
21759 }
21760 conds = get_attr_conds (insn);
21761 if (conds == CONDS_SET)
21762 {
21763 rtx set = single_set (insn);
21764 cfun->machine->thumb1_cc_insn = insn;
21765 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21766 cfun->machine->thumb1_cc_op1 = const0_rtx;
21767 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21768 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21769 {
21770 rtx src1 = XEXP (SET_SRC (set), 1);
21771 if (src1 == const0_rtx)
21772 cfun->machine->thumb1_cc_mode = CCmode;
21773 }
21774 }
21775 else if (conds != CONDS_NOCOND)
21776 cfun->machine->thumb1_cc_insn = NULL_RTX;
21777 }
21778 }
21779
21780 int
21781 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21782 {
21783 unsigned HOST_WIDE_INT mask = 0xff;
21784 int i;
21785
21786 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21787 if (val == 0) /* XXX */
21788 return 0;
21789
21790 for (i = 0; i < 25; i++)
21791 if ((val & (mask << i)) == val)
21792 return 1;
21793
21794 return 0;
21795 }
21796
21797 /* Returns nonzero if the current function contains,
21798 or might contain a far jump. */
21799 static int
21800 thumb_far_jump_used_p (void)
21801 {
21802 rtx insn;
21803
21804 /* This test is only important for leaf functions. */
21805 /* assert (!leaf_function_p ()); */
21806
21807 /* If we have already decided that far jumps may be used,
21808 do not bother checking again, and always return true even if
21809 it turns out that they are not being used. Once we have made
21810 the decision that far jumps are present (and that hence the link
21811 register will be pushed onto the stack) we cannot go back on it. */
21812 if (cfun->machine->far_jump_used)
21813 return 1;
21814
21815 /* If this function is not being called from the prologue/epilogue
21816 generation code then it must be being called from the
21817 INITIAL_ELIMINATION_OFFSET macro. */
21818 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21819 {
21820 /* In this case we know that we are being asked about the elimination
21821 of the arg pointer register. If that register is not being used,
21822 then there are no arguments on the stack, and we do not have to
21823 worry that a far jump might force the prologue to push the link
21824 register, changing the stack offsets. In this case we can just
21825 return false, since the presence of far jumps in the function will
21826 not affect stack offsets.
21827
21828 If the arg pointer is live (or if it was live, but has now been
21829 eliminated and so set to dead) then we do have to test to see if
21830 the function might contain a far jump. This test can lead to some
21831 false negatives, since before reload is completed, then length of
21832 branch instructions is not known, so gcc defaults to returning their
21833 longest length, which in turn sets the far jump attribute to true.
21834
21835 A false negative will not result in bad code being generated, but it
21836 will result in a needless push and pop of the link register. We
21837 hope that this does not occur too often.
21838
21839 If we need doubleword stack alignment this could affect the other
21840 elimination offsets so we can't risk getting it wrong. */
21841 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21842 cfun->machine->arg_pointer_live = 1;
21843 else if (!cfun->machine->arg_pointer_live)
21844 return 0;
21845 }
21846
21847 /* Check to see if the function contains a branch
21848 insn with the far jump attribute set. */
21849 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21850 {
21851 if (JUMP_P (insn)
21852 /* Ignore tablejump patterns. */
21853 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21854 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21855 && get_attr_far_jump (insn) == FAR_JUMP_YES
21856 )
21857 {
21858 /* Record the fact that we have decided that
21859 the function does use far jumps. */
21860 cfun->machine->far_jump_used = 1;
21861 return 1;
21862 }
21863 }
21864
21865 return 0;
21866 }
21867
21868 /* Return nonzero if FUNC must be entered in ARM mode. */
21869 int
21870 is_called_in_ARM_mode (tree func)
21871 {
21872 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21873
21874 /* Ignore the problem about functions whose address is taken. */
21875 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21876 return TRUE;
21877
21878 #ifdef ARM_PE
21879 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21880 #else
21881 return FALSE;
21882 #endif
21883 }
21884
21885 /* Given the stack offsets and register mask in OFFSETS, decide how
21886 many additional registers to push instead of subtracting a constant
21887 from SP. For epilogues the principle is the same except we use pop.
21888 FOR_PROLOGUE indicates which we're generating. */
21889 static int
21890 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21891 {
21892 HOST_WIDE_INT amount;
21893 unsigned long live_regs_mask = offsets->saved_regs_mask;
21894 /* Extract a mask of the ones we can give to the Thumb's push/pop
21895 instruction. */
21896 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21897 /* Then count how many other high registers will need to be pushed. */
21898 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21899 int n_free, reg_base, size;
21900
21901 if (!for_prologue && frame_pointer_needed)
21902 amount = offsets->locals_base - offsets->saved_regs;
21903 else
21904 amount = offsets->outgoing_args - offsets->saved_regs;
21905
21906 /* If the stack frame size is 512 exactly, we can save one load
21907 instruction, which should make this a win even when optimizing
21908 for speed. */
21909 if (!optimize_size && amount != 512)
21910 return 0;
21911
21912 /* Can't do this if there are high registers to push. */
21913 if (high_regs_pushed != 0)
21914 return 0;
21915
21916 /* Shouldn't do it in the prologue if no registers would normally
21917 be pushed at all. In the epilogue, also allow it if we'll have
21918 a pop insn for the PC. */
21919 if (l_mask == 0
21920 && (for_prologue
21921 || TARGET_BACKTRACE
21922 || (live_regs_mask & 1 << LR_REGNUM) == 0
21923 || TARGET_INTERWORK
21924 || crtl->args.pretend_args_size != 0))
21925 return 0;
21926
21927 /* Don't do this if thumb_expand_prologue wants to emit instructions
21928 between the push and the stack frame allocation. */
21929 if (for_prologue
21930 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21931 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21932 return 0;
21933
21934 reg_base = 0;
21935 n_free = 0;
21936 if (!for_prologue)
21937 {
21938 size = arm_size_return_regs ();
21939 reg_base = ARM_NUM_INTS (size);
21940 live_regs_mask >>= reg_base;
21941 }
21942
21943 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21944 && (for_prologue || call_used_regs[reg_base + n_free]))
21945 {
21946 live_regs_mask >>= 1;
21947 n_free++;
21948 }
21949
21950 if (n_free == 0)
21951 return 0;
21952 gcc_assert (amount / 4 * 4 == amount);
21953
21954 if (amount >= 512 && (amount - n_free * 4) < 512)
21955 return (amount - 508) / 4;
21956 if (amount <= n_free * 4)
21957 return amount / 4;
21958 return 0;
21959 }
21960
21961 /* The bits which aren't usefully expanded as rtl. */
21962 const char *
21963 thumb1_unexpanded_epilogue (void)
21964 {
21965 arm_stack_offsets *offsets;
21966 int regno;
21967 unsigned long live_regs_mask = 0;
21968 int high_regs_pushed = 0;
21969 int extra_pop;
21970 int had_to_push_lr;
21971 int size;
21972
21973 if (cfun->machine->return_used_this_function != 0)
21974 return "";
21975
21976 if (IS_NAKED (arm_current_func_type ()))
21977 return "";
21978
21979 offsets = arm_get_frame_offsets ();
21980 live_regs_mask = offsets->saved_regs_mask;
21981 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21982
21983 /* If we can deduce the registers used from the function's return value.
21984 This is more reliable that examining df_regs_ever_live_p () because that
21985 will be set if the register is ever used in the function, not just if
21986 the register is used to hold a return value. */
21987 size = arm_size_return_regs ();
21988
21989 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21990 if (extra_pop > 0)
21991 {
21992 unsigned long extra_mask = (1 << extra_pop) - 1;
21993 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
21994 }
21995
21996 /* The prolog may have pushed some high registers to use as
21997 work registers. e.g. the testsuite file:
21998 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21999 compiles to produce:
22000 push {r4, r5, r6, r7, lr}
22001 mov r7, r9
22002 mov r6, r8
22003 push {r6, r7}
22004 as part of the prolog. We have to undo that pushing here. */
22005
22006 if (high_regs_pushed)
22007 {
22008 unsigned long mask = live_regs_mask & 0xff;
22009 int next_hi_reg;
22010
22011 /* The available low registers depend on the size of the value we are
22012 returning. */
22013 if (size <= 12)
22014 mask |= 1 << 3;
22015 if (size <= 8)
22016 mask |= 1 << 2;
22017
22018 if (mask == 0)
22019 /* Oh dear! We have no low registers into which we can pop
22020 high registers! */
22021 internal_error
22022 ("no low registers available for popping high registers");
22023
22024 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22025 if (live_regs_mask & (1 << next_hi_reg))
22026 break;
22027
22028 while (high_regs_pushed)
22029 {
22030 /* Find lo register(s) into which the high register(s) can
22031 be popped. */
22032 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22033 {
22034 if (mask & (1 << regno))
22035 high_regs_pushed--;
22036 if (high_regs_pushed == 0)
22037 break;
22038 }
22039
22040 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22041
22042 /* Pop the values into the low register(s). */
22043 thumb_pop (asm_out_file, mask);
22044
22045 /* Move the value(s) into the high registers. */
22046 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22047 {
22048 if (mask & (1 << regno))
22049 {
22050 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22051 regno);
22052
22053 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22054 if (live_regs_mask & (1 << next_hi_reg))
22055 break;
22056 }
22057 }
22058 }
22059 live_regs_mask &= ~0x0f00;
22060 }
22061
22062 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22063 live_regs_mask &= 0xff;
22064
22065 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22066 {
22067 /* Pop the return address into the PC. */
22068 if (had_to_push_lr)
22069 live_regs_mask |= 1 << PC_REGNUM;
22070
22071 /* Either no argument registers were pushed or a backtrace
22072 structure was created which includes an adjusted stack
22073 pointer, so just pop everything. */
22074 if (live_regs_mask)
22075 thumb_pop (asm_out_file, live_regs_mask);
22076
22077 /* We have either just popped the return address into the
22078 PC or it is was kept in LR for the entire function.
22079 Note that thumb_pop has already called thumb_exit if the
22080 PC was in the list. */
22081 if (!had_to_push_lr)
22082 thumb_exit (asm_out_file, LR_REGNUM);
22083 }
22084 else
22085 {
22086 /* Pop everything but the return address. */
22087 if (live_regs_mask)
22088 thumb_pop (asm_out_file, live_regs_mask);
22089
22090 if (had_to_push_lr)
22091 {
22092 if (size > 12)
22093 {
22094 /* We have no free low regs, so save one. */
22095 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22096 LAST_ARG_REGNUM);
22097 }
22098
22099 /* Get the return address into a temporary register. */
22100 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22101
22102 if (size > 12)
22103 {
22104 /* Move the return address to lr. */
22105 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22106 LAST_ARG_REGNUM);
22107 /* Restore the low register. */
22108 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22109 IP_REGNUM);
22110 regno = LR_REGNUM;
22111 }
22112 else
22113 regno = LAST_ARG_REGNUM;
22114 }
22115 else
22116 regno = LR_REGNUM;
22117
22118 /* Remove the argument registers that were pushed onto the stack. */
22119 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22120 SP_REGNUM, SP_REGNUM,
22121 crtl->args.pretend_args_size);
22122
22123 thumb_exit (asm_out_file, regno);
22124 }
22125
22126 return "";
22127 }
22128
22129 /* Functions to save and restore machine-specific function data. */
22130 static struct machine_function *
22131 arm_init_machine_status (void)
22132 {
22133 struct machine_function *machine;
22134 machine = ggc_alloc_cleared_machine_function ();
22135
22136 #if ARM_FT_UNKNOWN != 0
22137 machine->func_type = ARM_FT_UNKNOWN;
22138 #endif
22139 return machine;
22140 }
22141
22142 /* Return an RTX indicating where the return address to the
22143 calling function can be found. */
22144 rtx
22145 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22146 {
22147 if (count != 0)
22148 return NULL_RTX;
22149
22150 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22151 }
22152
22153 /* Do anything needed before RTL is emitted for each function. */
22154 void
22155 arm_init_expanders (void)
22156 {
22157 /* Arrange to initialize and mark the machine per-function status. */
22158 init_machine_status = arm_init_machine_status;
22159
22160 /* This is to stop the combine pass optimizing away the alignment
22161 adjustment of va_arg. */
22162 /* ??? It is claimed that this should not be necessary. */
22163 if (cfun)
22164 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22165 }
22166
22167
22168 /* Like arm_compute_initial_elimination offset. Simpler because there
22169 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22170 to point at the base of the local variables after static stack
22171 space for a function has been allocated. */
22172
22173 HOST_WIDE_INT
22174 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22175 {
22176 arm_stack_offsets *offsets;
22177
22178 offsets = arm_get_frame_offsets ();
22179
22180 switch (from)
22181 {
22182 case ARG_POINTER_REGNUM:
22183 switch (to)
22184 {
22185 case STACK_POINTER_REGNUM:
22186 return offsets->outgoing_args - offsets->saved_args;
22187
22188 case FRAME_POINTER_REGNUM:
22189 return offsets->soft_frame - offsets->saved_args;
22190
22191 case ARM_HARD_FRAME_POINTER_REGNUM:
22192 return offsets->saved_regs - offsets->saved_args;
22193
22194 case THUMB_HARD_FRAME_POINTER_REGNUM:
22195 return offsets->locals_base - offsets->saved_args;
22196
22197 default:
22198 gcc_unreachable ();
22199 }
22200 break;
22201
22202 case FRAME_POINTER_REGNUM:
22203 switch (to)
22204 {
22205 case STACK_POINTER_REGNUM:
22206 return offsets->outgoing_args - offsets->soft_frame;
22207
22208 case ARM_HARD_FRAME_POINTER_REGNUM:
22209 return offsets->saved_regs - offsets->soft_frame;
22210
22211 case THUMB_HARD_FRAME_POINTER_REGNUM:
22212 return offsets->locals_base - offsets->soft_frame;
22213
22214 default:
22215 gcc_unreachable ();
22216 }
22217 break;
22218
22219 default:
22220 gcc_unreachable ();
22221 }
22222 }
22223
22224 /* Generate the function's prologue. */
22225
22226 void
22227 thumb1_expand_prologue (void)
22228 {
22229 rtx insn;
22230
22231 HOST_WIDE_INT amount;
22232 arm_stack_offsets *offsets;
22233 unsigned long func_type;
22234 int regno;
22235 unsigned long live_regs_mask;
22236 unsigned long l_mask;
22237 unsigned high_regs_pushed = 0;
22238
22239 func_type = arm_current_func_type ();
22240
22241 /* Naked functions don't have prologues. */
22242 if (IS_NAKED (func_type))
22243 return;
22244
22245 if (IS_INTERRUPT (func_type))
22246 {
22247 error ("interrupt Service Routines cannot be coded in Thumb mode");
22248 return;
22249 }
22250
22251 if (is_called_in_ARM_mode (current_function_decl))
22252 emit_insn (gen_prologue_thumb1_interwork ());
22253
22254 offsets = arm_get_frame_offsets ();
22255 live_regs_mask = offsets->saved_regs_mask;
22256
22257 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22258 l_mask = live_regs_mask & 0x40ff;
22259 /* Then count how many other high registers will need to be pushed. */
22260 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22261
22262 if (crtl->args.pretend_args_size)
22263 {
22264 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22265
22266 if (cfun->machine->uses_anonymous_args)
22267 {
22268 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22269 unsigned long mask;
22270
22271 mask = 1ul << (LAST_ARG_REGNUM + 1);
22272 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22273
22274 insn = thumb1_emit_multi_reg_push (mask, 0);
22275 }
22276 else
22277 {
22278 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22279 stack_pointer_rtx, x));
22280 }
22281 RTX_FRAME_RELATED_P (insn) = 1;
22282 }
22283
22284 if (TARGET_BACKTRACE)
22285 {
22286 HOST_WIDE_INT offset = 0;
22287 unsigned work_register;
22288 rtx work_reg, x, arm_hfp_rtx;
22289
22290 /* We have been asked to create a stack backtrace structure.
22291 The code looks like this:
22292
22293 0 .align 2
22294 0 func:
22295 0 sub SP, #16 Reserve space for 4 registers.
22296 2 push {R7} Push low registers.
22297 4 add R7, SP, #20 Get the stack pointer before the push.
22298 6 str R7, [SP, #8] Store the stack pointer
22299 (before reserving the space).
22300 8 mov R7, PC Get hold of the start of this code + 12.
22301 10 str R7, [SP, #16] Store it.
22302 12 mov R7, FP Get hold of the current frame pointer.
22303 14 str R7, [SP, #4] Store it.
22304 16 mov R7, LR Get hold of the current return address.
22305 18 str R7, [SP, #12] Store it.
22306 20 add R7, SP, #16 Point at the start of the
22307 backtrace structure.
22308 22 mov FP, R7 Put this value into the frame pointer. */
22309
22310 work_register = thumb_find_work_register (live_regs_mask);
22311 work_reg = gen_rtx_REG (SImode, work_register);
22312 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22313
22314 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22315 stack_pointer_rtx, GEN_INT (-16)));
22316 RTX_FRAME_RELATED_P (insn) = 1;
22317
22318 if (l_mask)
22319 {
22320 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22321 RTX_FRAME_RELATED_P (insn) = 1;
22322
22323 offset = bit_count (l_mask) * UNITS_PER_WORD;
22324 }
22325
22326 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22327 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22328
22329 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22330 x = gen_frame_mem (SImode, x);
22331 emit_move_insn (x, work_reg);
22332
22333 /* Make sure that the instruction fetching the PC is in the right place
22334 to calculate "start of backtrace creation code + 12". */
22335 /* ??? The stores using the common WORK_REG ought to be enough to
22336 prevent the scheduler from doing anything weird. Failing that
22337 we could always move all of the following into an UNSPEC_VOLATILE. */
22338 if (l_mask)
22339 {
22340 x = gen_rtx_REG (SImode, PC_REGNUM);
22341 emit_move_insn (work_reg, x);
22342
22343 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22344 x = gen_frame_mem (SImode, x);
22345 emit_move_insn (x, work_reg);
22346
22347 emit_move_insn (work_reg, arm_hfp_rtx);
22348
22349 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22350 x = gen_frame_mem (SImode, x);
22351 emit_move_insn (x, work_reg);
22352 }
22353 else
22354 {
22355 emit_move_insn (work_reg, arm_hfp_rtx);
22356
22357 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22358 x = gen_frame_mem (SImode, x);
22359 emit_move_insn (x, work_reg);
22360
22361 x = gen_rtx_REG (SImode, PC_REGNUM);
22362 emit_move_insn (work_reg, x);
22363
22364 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22365 x = gen_frame_mem (SImode, x);
22366 emit_move_insn (x, work_reg);
22367 }
22368
22369 x = gen_rtx_REG (SImode, LR_REGNUM);
22370 emit_move_insn (work_reg, x);
22371
22372 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22373 x = gen_frame_mem (SImode, x);
22374 emit_move_insn (x, work_reg);
22375
22376 x = GEN_INT (offset + 12);
22377 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22378
22379 emit_move_insn (arm_hfp_rtx, work_reg);
22380 }
22381 /* Optimization: If we are not pushing any low registers but we are going
22382 to push some high registers then delay our first push. This will just
22383 be a push of LR and we can combine it with the push of the first high
22384 register. */
22385 else if ((l_mask & 0xff) != 0
22386 || (high_regs_pushed == 0 && l_mask))
22387 {
22388 unsigned long mask = l_mask;
22389 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22390 insn = thumb1_emit_multi_reg_push (mask, mask);
22391 RTX_FRAME_RELATED_P (insn) = 1;
22392 }
22393
22394 if (high_regs_pushed)
22395 {
22396 unsigned pushable_regs;
22397 unsigned next_hi_reg;
22398
22399 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22400 if (live_regs_mask & (1 << next_hi_reg))
22401 break;
22402
22403 pushable_regs = l_mask & 0xff;
22404
22405 if (pushable_regs == 0)
22406 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22407
22408 while (high_regs_pushed > 0)
22409 {
22410 unsigned long real_regs_mask = 0;
22411
22412 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22413 {
22414 if (pushable_regs & (1 << regno))
22415 {
22416 emit_move_insn (gen_rtx_REG (SImode, regno),
22417 gen_rtx_REG (SImode, next_hi_reg));
22418
22419 high_regs_pushed --;
22420 real_regs_mask |= (1 << next_hi_reg);
22421
22422 if (high_regs_pushed)
22423 {
22424 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22425 next_hi_reg --)
22426 if (live_regs_mask & (1 << next_hi_reg))
22427 break;
22428 }
22429 else
22430 {
22431 pushable_regs &= ~((1 << regno) - 1);
22432 break;
22433 }
22434 }
22435 }
22436
22437 /* If we had to find a work register and we have not yet
22438 saved the LR then add it to the list of regs to push. */
22439 if (l_mask == (1 << LR_REGNUM))
22440 {
22441 pushable_regs |= l_mask;
22442 real_regs_mask |= l_mask;
22443 l_mask = 0;
22444 }
22445
22446 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22447 RTX_FRAME_RELATED_P (insn) = 1;
22448 }
22449 }
22450
22451 /* Load the pic register before setting the frame pointer,
22452 so we can use r7 as a temporary work register. */
22453 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22454 arm_load_pic_register (live_regs_mask);
22455
22456 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22457 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22458 stack_pointer_rtx);
22459
22460 if (flag_stack_usage_info)
22461 current_function_static_stack_size
22462 = offsets->outgoing_args - offsets->saved_args;
22463
22464 amount = offsets->outgoing_args - offsets->saved_regs;
22465 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22466 if (amount)
22467 {
22468 if (amount < 512)
22469 {
22470 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22471 GEN_INT (- amount)));
22472 RTX_FRAME_RELATED_P (insn) = 1;
22473 }
22474 else
22475 {
22476 rtx reg, dwarf;
22477
22478 /* The stack decrement is too big for an immediate value in a single
22479 insn. In theory we could issue multiple subtracts, but after
22480 three of them it becomes more space efficient to place the full
22481 value in the constant pool and load into a register. (Also the
22482 ARM debugger really likes to see only one stack decrement per
22483 function). So instead we look for a scratch register into which
22484 we can load the decrement, and then we subtract this from the
22485 stack pointer. Unfortunately on the thumb the only available
22486 scratch registers are the argument registers, and we cannot use
22487 these as they may hold arguments to the function. Instead we
22488 attempt to locate a call preserved register which is used by this
22489 function. If we can find one, then we know that it will have
22490 been pushed at the start of the prologue and so we can corrupt
22491 it now. */
22492 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22493 if (live_regs_mask & (1 << regno))
22494 break;
22495
22496 gcc_assert(regno <= LAST_LO_REGNUM);
22497
22498 reg = gen_rtx_REG (SImode, regno);
22499
22500 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22501
22502 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22503 stack_pointer_rtx, reg));
22504
22505 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22506 plus_constant (Pmode, stack_pointer_rtx,
22507 -amount));
22508 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22509 RTX_FRAME_RELATED_P (insn) = 1;
22510 }
22511 }
22512
22513 if (frame_pointer_needed)
22514 thumb_set_frame_pointer (offsets);
22515
22516 /* If we are profiling, make sure no instructions are scheduled before
22517 the call to mcount. Similarly if the user has requested no
22518 scheduling in the prolog. Similarly if we want non-call exceptions
22519 using the EABI unwinder, to prevent faulting instructions from being
22520 swapped with a stack adjustment. */
22521 if (crtl->profile || !TARGET_SCHED_PROLOG
22522 || (arm_except_unwind_info (&global_options) == UI_TARGET
22523 && cfun->can_throw_non_call_exceptions))
22524 emit_insn (gen_blockage ());
22525
22526 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22527 if (live_regs_mask & 0xff)
22528 cfun->machine->lr_save_eliminated = 0;
22529 }
22530
22531 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22532 POP instruction can be generated. LR should be replaced by PC. All
22533 the checks required are already done by USE_RETURN_INSN (). Hence,
22534 all we really need to check here is if single register is to be
22535 returned, or multiple register return. */
22536 void
22537 thumb2_expand_return (void)
22538 {
22539 int i, num_regs;
22540 unsigned long saved_regs_mask;
22541 arm_stack_offsets *offsets;
22542
22543 offsets = arm_get_frame_offsets ();
22544 saved_regs_mask = offsets->saved_regs_mask;
22545
22546 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22547 if (saved_regs_mask & (1 << i))
22548 num_regs++;
22549
22550 if (saved_regs_mask)
22551 {
22552 if (num_regs == 1)
22553 {
22554 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22555 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22556 rtx addr = gen_rtx_MEM (SImode,
22557 gen_rtx_POST_INC (SImode,
22558 stack_pointer_rtx));
22559 set_mem_alias_set (addr, get_frame_alias_set ());
22560 XVECEXP (par, 0, 0) = ret_rtx;
22561 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22562 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22563 emit_jump_insn (par);
22564 }
22565 else
22566 {
22567 saved_regs_mask &= ~ (1 << LR_REGNUM);
22568 saved_regs_mask |= (1 << PC_REGNUM);
22569 arm_emit_multi_reg_pop (saved_regs_mask);
22570 }
22571 }
22572 else
22573 {
22574 emit_jump_insn (simple_return_rtx);
22575 }
22576 }
22577
22578 void
22579 thumb1_expand_epilogue (void)
22580 {
22581 HOST_WIDE_INT amount;
22582 arm_stack_offsets *offsets;
22583 int regno;
22584
22585 /* Naked functions don't have prologues. */
22586 if (IS_NAKED (arm_current_func_type ()))
22587 return;
22588
22589 offsets = arm_get_frame_offsets ();
22590 amount = offsets->outgoing_args - offsets->saved_regs;
22591
22592 if (frame_pointer_needed)
22593 {
22594 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22595 amount = offsets->locals_base - offsets->saved_regs;
22596 }
22597 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22598
22599 gcc_assert (amount >= 0);
22600 if (amount)
22601 {
22602 emit_insn (gen_blockage ());
22603
22604 if (amount < 512)
22605 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22606 GEN_INT (amount)));
22607 else
22608 {
22609 /* r3 is always free in the epilogue. */
22610 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22611
22612 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22613 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22614 }
22615 }
22616
22617 /* Emit a USE (stack_pointer_rtx), so that
22618 the stack adjustment will not be deleted. */
22619 emit_insn (gen_prologue_use (stack_pointer_rtx));
22620
22621 if (crtl->profile || !TARGET_SCHED_PROLOG)
22622 emit_insn (gen_blockage ());
22623
22624 /* Emit a clobber for each insn that will be restored in the epilogue,
22625 so that flow2 will get register lifetimes correct. */
22626 for (regno = 0; regno < 13; regno++)
22627 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22628 emit_clobber (gen_rtx_REG (SImode, regno));
22629
22630 if (! df_regs_ever_live_p (LR_REGNUM))
22631 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22632 }
22633
22634 /* Epilogue code for APCS frame. */
22635 static void
22636 arm_expand_epilogue_apcs_frame (bool really_return)
22637 {
22638 unsigned long func_type;
22639 unsigned long saved_regs_mask;
22640 int num_regs = 0;
22641 int i;
22642 int floats_from_frame = 0;
22643 arm_stack_offsets *offsets;
22644
22645 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22646 func_type = arm_current_func_type ();
22647
22648 /* Get frame offsets for ARM. */
22649 offsets = arm_get_frame_offsets ();
22650 saved_regs_mask = offsets->saved_regs_mask;
22651
22652 /* Find the offset of the floating-point save area in the frame. */
22653 floats_from_frame = offsets->saved_args - offsets->frame;
22654
22655 /* Compute how many core registers saved and how far away the floats are. */
22656 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22657 if (saved_regs_mask & (1 << i))
22658 {
22659 num_regs++;
22660 floats_from_frame += 4;
22661 }
22662
22663 if (TARGET_HARD_FLOAT && TARGET_VFP)
22664 {
22665 int start_reg;
22666
22667 /* The offset is from IP_REGNUM. */
22668 int saved_size = arm_get_vfp_saved_size ();
22669 if (saved_size > 0)
22670 {
22671 floats_from_frame += saved_size;
22672 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22673 hard_frame_pointer_rtx,
22674 GEN_INT (-floats_from_frame)));
22675 }
22676
22677 /* Generate VFP register multi-pop. */
22678 start_reg = FIRST_VFP_REGNUM;
22679
22680 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22681 /* Look for a case where a reg does not need restoring. */
22682 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22683 && (!df_regs_ever_live_p (i + 1)
22684 || call_used_regs[i + 1]))
22685 {
22686 if (start_reg != i)
22687 arm_emit_vfp_multi_reg_pop (start_reg,
22688 (i - start_reg) / 2,
22689 gen_rtx_REG (SImode,
22690 IP_REGNUM));
22691 start_reg = i + 2;
22692 }
22693
22694 /* Restore the remaining regs that we have discovered (or possibly
22695 even all of them, if the conditional in the for loop never
22696 fired). */
22697 if (start_reg != i)
22698 arm_emit_vfp_multi_reg_pop (start_reg,
22699 (i - start_reg) / 2,
22700 gen_rtx_REG (SImode, IP_REGNUM));
22701 }
22702
22703 if (TARGET_IWMMXT)
22704 {
22705 /* The frame pointer is guaranteed to be non-double-word aligned, as
22706 it is set to double-word-aligned old_stack_pointer - 4. */
22707 rtx insn;
22708 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22709
22710 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22711 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22712 {
22713 rtx addr = gen_frame_mem (V2SImode,
22714 plus_constant (Pmode, hard_frame_pointer_rtx,
22715 - lrm_count * 4));
22716 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22717 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22718 gen_rtx_REG (V2SImode, i),
22719 NULL_RTX);
22720 lrm_count += 2;
22721 }
22722 }
22723
22724 /* saved_regs_mask should contain IP which contains old stack pointer
22725 at the time of activation creation. Since SP and IP are adjacent registers,
22726 we can restore the value directly into SP. */
22727 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22728 saved_regs_mask &= ~(1 << IP_REGNUM);
22729 saved_regs_mask |= (1 << SP_REGNUM);
22730
22731 /* There are two registers left in saved_regs_mask - LR and PC. We
22732 only need to restore LR (the return address), but to
22733 save time we can load it directly into PC, unless we need a
22734 special function exit sequence, or we are not really returning. */
22735 if (really_return
22736 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22737 && !crtl->calls_eh_return)
22738 /* Delete LR from the register mask, so that LR on
22739 the stack is loaded into the PC in the register mask. */
22740 saved_regs_mask &= ~(1 << LR_REGNUM);
22741 else
22742 saved_regs_mask &= ~(1 << PC_REGNUM);
22743
22744 num_regs = bit_count (saved_regs_mask);
22745 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22746 {
22747 /* Unwind the stack to just below the saved registers. */
22748 emit_insn (gen_addsi3 (stack_pointer_rtx,
22749 hard_frame_pointer_rtx,
22750 GEN_INT (- 4 * num_regs)));
22751 }
22752
22753 arm_emit_multi_reg_pop (saved_regs_mask);
22754
22755 if (IS_INTERRUPT (func_type))
22756 {
22757 /* Interrupt handlers will have pushed the
22758 IP onto the stack, so restore it now. */
22759 rtx insn;
22760 rtx addr = gen_rtx_MEM (SImode,
22761 gen_rtx_POST_INC (SImode,
22762 stack_pointer_rtx));
22763 set_mem_alias_set (addr, get_frame_alias_set ());
22764 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22765 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22766 gen_rtx_REG (SImode, IP_REGNUM),
22767 NULL_RTX);
22768 }
22769
22770 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22771 return;
22772
22773 if (crtl->calls_eh_return)
22774 emit_insn (gen_addsi3 (stack_pointer_rtx,
22775 stack_pointer_rtx,
22776 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22777
22778 if (IS_STACKALIGN (func_type))
22779 /* Restore the original stack pointer. Before prologue, the stack was
22780 realigned and the original stack pointer saved in r0. For details,
22781 see comment in arm_expand_prologue. */
22782 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22783
22784 emit_jump_insn (simple_return_rtx);
22785 }
22786
22787 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22788 function is not a sibcall. */
22789 void
22790 arm_expand_epilogue (bool really_return)
22791 {
22792 unsigned long func_type;
22793 unsigned long saved_regs_mask;
22794 int num_regs = 0;
22795 int i;
22796 int amount;
22797 arm_stack_offsets *offsets;
22798
22799 func_type = arm_current_func_type ();
22800
22801 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22802 let output_return_instruction take care of instruction emition if any. */
22803 if (IS_NAKED (func_type)
22804 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22805 {
22806 emit_jump_insn (simple_return_rtx);
22807 return;
22808 }
22809
22810 /* If we are throwing an exception, then we really must be doing a
22811 return, so we can't tail-call. */
22812 gcc_assert (!crtl->calls_eh_return || really_return);
22813
22814 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22815 {
22816 arm_expand_epilogue_apcs_frame (really_return);
22817 return;
22818 }
22819
22820 /* Get frame offsets for ARM. */
22821 offsets = arm_get_frame_offsets ();
22822 saved_regs_mask = offsets->saved_regs_mask;
22823 num_regs = bit_count (saved_regs_mask);
22824
22825 if (frame_pointer_needed)
22826 {
22827 /* Restore stack pointer if necessary. */
22828 if (TARGET_ARM)
22829 {
22830 /* In ARM mode, frame pointer points to first saved register.
22831 Restore stack pointer to last saved register. */
22832 amount = offsets->frame - offsets->saved_regs;
22833
22834 /* Force out any pending memory operations that reference stacked data
22835 before stack de-allocation occurs. */
22836 emit_insn (gen_blockage ());
22837 emit_insn (gen_addsi3 (stack_pointer_rtx,
22838 hard_frame_pointer_rtx,
22839 GEN_INT (amount)));
22840
22841 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22842 deleted. */
22843 emit_insn (gen_prologue_use (stack_pointer_rtx));
22844 }
22845 else
22846 {
22847 /* In Thumb-2 mode, the frame pointer points to the last saved
22848 register. */
22849 amount = offsets->locals_base - offsets->saved_regs;
22850 if (amount)
22851 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22852 hard_frame_pointer_rtx,
22853 GEN_INT (amount)));
22854
22855 /* Force out any pending memory operations that reference stacked data
22856 before stack de-allocation occurs. */
22857 emit_insn (gen_blockage ());
22858 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22859 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22860 deleted. */
22861 emit_insn (gen_prologue_use (stack_pointer_rtx));
22862 }
22863 }
22864 else
22865 {
22866 /* Pop off outgoing args and local frame to adjust stack pointer to
22867 last saved register. */
22868 amount = offsets->outgoing_args - offsets->saved_regs;
22869 if (amount)
22870 {
22871 /* Force out any pending memory operations that reference stacked data
22872 before stack de-allocation occurs. */
22873 emit_insn (gen_blockage ());
22874 emit_insn (gen_addsi3 (stack_pointer_rtx,
22875 stack_pointer_rtx,
22876 GEN_INT (amount)));
22877 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22878 not deleted. */
22879 emit_insn (gen_prologue_use (stack_pointer_rtx));
22880 }
22881 }
22882
22883 if (TARGET_HARD_FLOAT && TARGET_VFP)
22884 {
22885 /* Generate VFP register multi-pop. */
22886 int end_reg = LAST_VFP_REGNUM + 1;
22887
22888 /* Scan the registers in reverse order. We need to match
22889 any groupings made in the prologue and generate matching
22890 vldm operations. The need to match groups is because,
22891 unlike pop, vldm can only do consecutive regs. */
22892 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22893 /* Look for a case where a reg does not need restoring. */
22894 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22895 && (!df_regs_ever_live_p (i + 1)
22896 || call_used_regs[i + 1]))
22897 {
22898 /* Restore the regs discovered so far (from reg+2 to
22899 end_reg). */
22900 if (end_reg > i + 2)
22901 arm_emit_vfp_multi_reg_pop (i + 2,
22902 (end_reg - (i + 2)) / 2,
22903 stack_pointer_rtx);
22904 end_reg = i;
22905 }
22906
22907 /* Restore the remaining regs that we have discovered (or possibly
22908 even all of them, if the conditional in the for loop never
22909 fired). */
22910 if (end_reg > i + 2)
22911 arm_emit_vfp_multi_reg_pop (i + 2,
22912 (end_reg - (i + 2)) / 2,
22913 stack_pointer_rtx);
22914 }
22915
22916 if (TARGET_IWMMXT)
22917 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
22918 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22919 {
22920 rtx insn;
22921 rtx addr = gen_rtx_MEM (V2SImode,
22922 gen_rtx_POST_INC (SImode,
22923 stack_pointer_rtx));
22924 set_mem_alias_set (addr, get_frame_alias_set ());
22925 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22926 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22927 gen_rtx_REG (V2SImode, i),
22928 NULL_RTX);
22929 }
22930
22931 if (saved_regs_mask)
22932 {
22933 rtx insn;
22934 bool return_in_pc = false;
22935
22936 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
22937 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
22938 && !IS_STACKALIGN (func_type)
22939 && really_return
22940 && crtl->args.pretend_args_size == 0
22941 && saved_regs_mask & (1 << LR_REGNUM)
22942 && !crtl->calls_eh_return)
22943 {
22944 saved_regs_mask &= ~(1 << LR_REGNUM);
22945 saved_regs_mask |= (1 << PC_REGNUM);
22946 return_in_pc = true;
22947 }
22948
22949 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
22950 {
22951 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22952 if (saved_regs_mask & (1 << i))
22953 {
22954 rtx addr = gen_rtx_MEM (SImode,
22955 gen_rtx_POST_INC (SImode,
22956 stack_pointer_rtx));
22957 set_mem_alias_set (addr, get_frame_alias_set ());
22958
22959 if (i == PC_REGNUM)
22960 {
22961 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22962 XVECEXP (insn, 0, 0) = ret_rtx;
22963 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
22964 gen_rtx_REG (SImode, i),
22965 addr);
22966 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
22967 insn = emit_jump_insn (insn);
22968 }
22969 else
22970 {
22971 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
22972 addr));
22973 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22974 gen_rtx_REG (SImode, i),
22975 NULL_RTX);
22976 }
22977 }
22978 }
22979 else
22980 {
22981 arm_emit_multi_reg_pop (saved_regs_mask);
22982 }
22983
22984 if (return_in_pc == true)
22985 return;
22986 }
22987
22988 if (crtl->args.pretend_args_size)
22989 emit_insn (gen_addsi3 (stack_pointer_rtx,
22990 stack_pointer_rtx,
22991 GEN_INT (crtl->args.pretend_args_size)));
22992
22993 if (!really_return)
22994 return;
22995
22996 if (crtl->calls_eh_return)
22997 emit_insn (gen_addsi3 (stack_pointer_rtx,
22998 stack_pointer_rtx,
22999 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23000
23001 if (IS_STACKALIGN (func_type))
23002 /* Restore the original stack pointer. Before prologue, the stack was
23003 realigned and the original stack pointer saved in r0. For details,
23004 see comment in arm_expand_prologue. */
23005 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23006
23007 emit_jump_insn (simple_return_rtx);
23008 }
23009
23010 /* Implementation of insn prologue_thumb1_interwork. This is the first
23011 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23012
23013 const char *
23014 thumb1_output_interwork (void)
23015 {
23016 const char * name;
23017 FILE *f = asm_out_file;
23018
23019 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23020 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23021 == SYMBOL_REF);
23022 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23023
23024 /* Generate code sequence to switch us into Thumb mode. */
23025 /* The .code 32 directive has already been emitted by
23026 ASM_DECLARE_FUNCTION_NAME. */
23027 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23028 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23029
23030 /* Generate a label, so that the debugger will notice the
23031 change in instruction sets. This label is also used by
23032 the assembler to bypass the ARM code when this function
23033 is called from a Thumb encoded function elsewhere in the
23034 same file. Hence the definition of STUB_NAME here must
23035 agree with the definition in gas/config/tc-arm.c. */
23036
23037 #define STUB_NAME ".real_start_of"
23038
23039 fprintf (f, "\t.code\t16\n");
23040 #ifdef ARM_PE
23041 if (arm_dllexport_name_p (name))
23042 name = arm_strip_name_encoding (name);
23043 #endif
23044 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23045 fprintf (f, "\t.thumb_func\n");
23046 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23047
23048 return "";
23049 }
23050
23051 /* Handle the case of a double word load into a low register from
23052 a computed memory address. The computed address may involve a
23053 register which is overwritten by the load. */
23054 const char *
23055 thumb_load_double_from_address (rtx *operands)
23056 {
23057 rtx addr;
23058 rtx base;
23059 rtx offset;
23060 rtx arg1;
23061 rtx arg2;
23062
23063 gcc_assert (REG_P (operands[0]));
23064 gcc_assert (MEM_P (operands[1]));
23065
23066 /* Get the memory address. */
23067 addr = XEXP (operands[1], 0);
23068
23069 /* Work out how the memory address is computed. */
23070 switch (GET_CODE (addr))
23071 {
23072 case REG:
23073 operands[2] = adjust_address (operands[1], SImode, 4);
23074
23075 if (REGNO (operands[0]) == REGNO (addr))
23076 {
23077 output_asm_insn ("ldr\t%H0, %2", operands);
23078 output_asm_insn ("ldr\t%0, %1", operands);
23079 }
23080 else
23081 {
23082 output_asm_insn ("ldr\t%0, %1", operands);
23083 output_asm_insn ("ldr\t%H0, %2", operands);
23084 }
23085 break;
23086
23087 case CONST:
23088 /* Compute <address> + 4 for the high order load. */
23089 operands[2] = adjust_address (operands[1], SImode, 4);
23090
23091 output_asm_insn ("ldr\t%0, %1", operands);
23092 output_asm_insn ("ldr\t%H0, %2", operands);
23093 break;
23094
23095 case PLUS:
23096 arg1 = XEXP (addr, 0);
23097 arg2 = XEXP (addr, 1);
23098
23099 if (CONSTANT_P (arg1))
23100 base = arg2, offset = arg1;
23101 else
23102 base = arg1, offset = arg2;
23103
23104 gcc_assert (REG_P (base));
23105
23106 /* Catch the case of <address> = <reg> + <reg> */
23107 if (REG_P (offset))
23108 {
23109 int reg_offset = REGNO (offset);
23110 int reg_base = REGNO (base);
23111 int reg_dest = REGNO (operands[0]);
23112
23113 /* Add the base and offset registers together into the
23114 higher destination register. */
23115 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23116 reg_dest + 1, reg_base, reg_offset);
23117
23118 /* Load the lower destination register from the address in
23119 the higher destination register. */
23120 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23121 reg_dest, reg_dest + 1);
23122
23123 /* Load the higher destination register from its own address
23124 plus 4. */
23125 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23126 reg_dest + 1, reg_dest + 1);
23127 }
23128 else
23129 {
23130 /* Compute <address> + 4 for the high order load. */
23131 operands[2] = adjust_address (operands[1], SImode, 4);
23132
23133 /* If the computed address is held in the low order register
23134 then load the high order register first, otherwise always
23135 load the low order register first. */
23136 if (REGNO (operands[0]) == REGNO (base))
23137 {
23138 output_asm_insn ("ldr\t%H0, %2", operands);
23139 output_asm_insn ("ldr\t%0, %1", operands);
23140 }
23141 else
23142 {
23143 output_asm_insn ("ldr\t%0, %1", operands);
23144 output_asm_insn ("ldr\t%H0, %2", operands);
23145 }
23146 }
23147 break;
23148
23149 case LABEL_REF:
23150 /* With no registers to worry about we can just load the value
23151 directly. */
23152 operands[2] = adjust_address (operands[1], SImode, 4);
23153
23154 output_asm_insn ("ldr\t%H0, %2", operands);
23155 output_asm_insn ("ldr\t%0, %1", operands);
23156 break;
23157
23158 default:
23159 gcc_unreachable ();
23160 }
23161
23162 return "";
23163 }
23164
23165 const char *
23166 thumb_output_move_mem_multiple (int n, rtx *operands)
23167 {
23168 rtx tmp;
23169
23170 switch (n)
23171 {
23172 case 2:
23173 if (REGNO (operands[4]) > REGNO (operands[5]))
23174 {
23175 tmp = operands[4];
23176 operands[4] = operands[5];
23177 operands[5] = tmp;
23178 }
23179 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23180 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23181 break;
23182
23183 case 3:
23184 if (REGNO (operands[4]) > REGNO (operands[5]))
23185 {
23186 tmp = operands[4];
23187 operands[4] = operands[5];
23188 operands[5] = tmp;
23189 }
23190 if (REGNO (operands[5]) > REGNO (operands[6]))
23191 {
23192 tmp = operands[5];
23193 operands[5] = operands[6];
23194 operands[6] = tmp;
23195 }
23196 if (REGNO (operands[4]) > REGNO (operands[5]))
23197 {
23198 tmp = operands[4];
23199 operands[4] = operands[5];
23200 operands[5] = tmp;
23201 }
23202
23203 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23204 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23205 break;
23206
23207 default:
23208 gcc_unreachable ();
23209 }
23210
23211 return "";
23212 }
23213
23214 /* Output a call-via instruction for thumb state. */
23215 const char *
23216 thumb_call_via_reg (rtx reg)
23217 {
23218 int regno = REGNO (reg);
23219 rtx *labelp;
23220
23221 gcc_assert (regno < LR_REGNUM);
23222
23223 /* If we are in the normal text section we can use a single instance
23224 per compilation unit. If we are doing function sections, then we need
23225 an entry per section, since we can't rely on reachability. */
23226 if (in_section == text_section)
23227 {
23228 thumb_call_reg_needed = 1;
23229
23230 if (thumb_call_via_label[regno] == NULL)
23231 thumb_call_via_label[regno] = gen_label_rtx ();
23232 labelp = thumb_call_via_label + regno;
23233 }
23234 else
23235 {
23236 if (cfun->machine->call_via[regno] == NULL)
23237 cfun->machine->call_via[regno] = gen_label_rtx ();
23238 labelp = cfun->machine->call_via + regno;
23239 }
23240
23241 output_asm_insn ("bl\t%a0", labelp);
23242 return "";
23243 }
23244
23245 /* Routines for generating rtl. */
23246 void
23247 thumb_expand_movmemqi (rtx *operands)
23248 {
23249 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23250 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23251 HOST_WIDE_INT len = INTVAL (operands[2]);
23252 HOST_WIDE_INT offset = 0;
23253
23254 while (len >= 12)
23255 {
23256 emit_insn (gen_movmem12b (out, in, out, in));
23257 len -= 12;
23258 }
23259
23260 if (len >= 8)
23261 {
23262 emit_insn (gen_movmem8b (out, in, out, in));
23263 len -= 8;
23264 }
23265
23266 if (len >= 4)
23267 {
23268 rtx reg = gen_reg_rtx (SImode);
23269 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23270 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23271 len -= 4;
23272 offset += 4;
23273 }
23274
23275 if (len >= 2)
23276 {
23277 rtx reg = gen_reg_rtx (HImode);
23278 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23279 plus_constant (Pmode, in,
23280 offset))));
23281 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23282 offset)),
23283 reg));
23284 len -= 2;
23285 offset += 2;
23286 }
23287
23288 if (len)
23289 {
23290 rtx reg = gen_reg_rtx (QImode);
23291 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23292 plus_constant (Pmode, in,
23293 offset))));
23294 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23295 offset)),
23296 reg));
23297 }
23298 }
23299
23300 void
23301 thumb_reload_out_hi (rtx *operands)
23302 {
23303 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23304 }
23305
23306 /* Handle reading a half-word from memory during reload. */
23307 void
23308 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23309 {
23310 gcc_unreachable ();
23311 }
23312
23313 /* Return the length of a function name prefix
23314 that starts with the character 'c'. */
23315 static int
23316 arm_get_strip_length (int c)
23317 {
23318 switch (c)
23319 {
23320 ARM_NAME_ENCODING_LENGTHS
23321 default: return 0;
23322 }
23323 }
23324
23325 /* Return a pointer to a function's name with any
23326 and all prefix encodings stripped from it. */
23327 const char *
23328 arm_strip_name_encoding (const char *name)
23329 {
23330 int skip;
23331
23332 while ((skip = arm_get_strip_length (* name)))
23333 name += skip;
23334
23335 return name;
23336 }
23337
23338 /* If there is a '*' anywhere in the name's prefix, then
23339 emit the stripped name verbatim, otherwise prepend an
23340 underscore if leading underscores are being used. */
23341 void
23342 arm_asm_output_labelref (FILE *stream, const char *name)
23343 {
23344 int skip;
23345 int verbatim = 0;
23346
23347 while ((skip = arm_get_strip_length (* name)))
23348 {
23349 verbatim |= (*name == '*');
23350 name += skip;
23351 }
23352
23353 if (verbatim)
23354 fputs (name, stream);
23355 else
23356 asm_fprintf (stream, "%U%s", name);
23357 }
23358
23359 /* This function is used to emit an EABI tag and its associated value.
23360 We emit the numerical value of the tag in case the assembler does not
23361 support textual tags. (Eg gas prior to 2.20). If requested we include
23362 the tag name in a comment so that anyone reading the assembler output
23363 will know which tag is being set.
23364
23365 This function is not static because arm-c.c needs it too. */
23366
23367 void
23368 arm_emit_eabi_attribute (const char *name, int num, int val)
23369 {
23370 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23371 if (flag_verbose_asm || flag_debug_asm)
23372 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23373 asm_fprintf (asm_out_file, "\n");
23374 }
23375
23376 static void
23377 arm_file_start (void)
23378 {
23379 int val;
23380
23381 if (TARGET_UNIFIED_ASM)
23382 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23383
23384 if (TARGET_BPABI)
23385 {
23386 const char *fpu_name;
23387 if (arm_selected_arch)
23388 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23389 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23390 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23391 else
23392 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23393
23394 if (TARGET_SOFT_FLOAT)
23395 {
23396 fpu_name = "softvfp";
23397 }
23398 else
23399 {
23400 fpu_name = arm_fpu_desc->name;
23401 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23402 {
23403 if (TARGET_HARD_FLOAT)
23404 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23405 if (TARGET_HARD_FLOAT_ABI)
23406 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23407 }
23408 }
23409 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23410
23411 /* Some of these attributes only apply when the corresponding features
23412 are used. However we don't have any easy way of figuring this out.
23413 Conservatively record the setting that would have been used. */
23414
23415 if (flag_rounding_math)
23416 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23417
23418 if (!flag_unsafe_math_optimizations)
23419 {
23420 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23421 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23422 }
23423 if (flag_signaling_nans)
23424 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23425
23426 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23427 flag_finite_math_only ? 1 : 3);
23428
23429 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23430 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23431 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23432 flag_short_enums ? 1 : 2);
23433
23434 /* Tag_ABI_optimization_goals. */
23435 if (optimize_size)
23436 val = 4;
23437 else if (optimize >= 2)
23438 val = 2;
23439 else if (optimize)
23440 val = 1;
23441 else
23442 val = 6;
23443 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23444
23445 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23446 unaligned_access);
23447
23448 if (arm_fp16_format)
23449 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23450 (int) arm_fp16_format);
23451
23452 if (arm_lang_output_object_attributes_hook)
23453 arm_lang_output_object_attributes_hook();
23454 }
23455
23456 default_file_start ();
23457 }
23458
23459 static void
23460 arm_file_end (void)
23461 {
23462 int regno;
23463
23464 if (NEED_INDICATE_EXEC_STACK)
23465 /* Add .note.GNU-stack. */
23466 file_end_indicate_exec_stack ();
23467
23468 if (! thumb_call_reg_needed)
23469 return;
23470
23471 switch_to_section (text_section);
23472 asm_fprintf (asm_out_file, "\t.code 16\n");
23473 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23474
23475 for (regno = 0; regno < LR_REGNUM; regno++)
23476 {
23477 rtx label = thumb_call_via_label[regno];
23478
23479 if (label != 0)
23480 {
23481 targetm.asm_out.internal_label (asm_out_file, "L",
23482 CODE_LABEL_NUMBER (label));
23483 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23484 }
23485 }
23486 }
23487
23488 #ifndef ARM_PE
23489 /* Symbols in the text segment can be accessed without indirecting via the
23490 constant pool; it may take an extra binary operation, but this is still
23491 faster than indirecting via memory. Don't do this when not optimizing,
23492 since we won't be calculating al of the offsets necessary to do this
23493 simplification. */
23494
23495 static void
23496 arm_encode_section_info (tree decl, rtx rtl, int first)
23497 {
23498 if (optimize > 0 && TREE_CONSTANT (decl))
23499 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23500
23501 default_encode_section_info (decl, rtl, first);
23502 }
23503 #endif /* !ARM_PE */
23504
23505 static void
23506 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23507 {
23508 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23509 && !strcmp (prefix, "L"))
23510 {
23511 arm_ccfsm_state = 0;
23512 arm_target_insn = NULL;
23513 }
23514 default_internal_label (stream, prefix, labelno);
23515 }
23516
23517 /* Output code to add DELTA to the first argument, and then jump
23518 to FUNCTION. Used for C++ multiple inheritance. */
23519 static void
23520 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23521 HOST_WIDE_INT delta,
23522 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23523 tree function)
23524 {
23525 static int thunk_label = 0;
23526 char label[256];
23527 char labelpc[256];
23528 int mi_delta = delta;
23529 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23530 int shift = 0;
23531 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23532 ? 1 : 0);
23533 if (mi_delta < 0)
23534 mi_delta = - mi_delta;
23535
23536 if (TARGET_THUMB1)
23537 {
23538 int labelno = thunk_label++;
23539 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23540 /* Thunks are entered in arm mode when avaiable. */
23541 if (TARGET_THUMB1_ONLY)
23542 {
23543 /* push r3 so we can use it as a temporary. */
23544 /* TODO: Omit this save if r3 is not used. */
23545 fputs ("\tpush {r3}\n", file);
23546 fputs ("\tldr\tr3, ", file);
23547 }
23548 else
23549 {
23550 fputs ("\tldr\tr12, ", file);
23551 }
23552 assemble_name (file, label);
23553 fputc ('\n', file);
23554 if (flag_pic)
23555 {
23556 /* If we are generating PIC, the ldr instruction below loads
23557 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23558 the address of the add + 8, so we have:
23559
23560 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23561 = target + 1.
23562
23563 Note that we have "+ 1" because some versions of GNU ld
23564 don't set the low bit of the result for R_ARM_REL32
23565 relocations against thumb function symbols.
23566 On ARMv6M this is +4, not +8. */
23567 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23568 assemble_name (file, labelpc);
23569 fputs (":\n", file);
23570 if (TARGET_THUMB1_ONLY)
23571 {
23572 /* This is 2 insns after the start of the thunk, so we know it
23573 is 4-byte aligned. */
23574 fputs ("\tadd\tr3, pc, r3\n", file);
23575 fputs ("\tmov r12, r3\n", file);
23576 }
23577 else
23578 fputs ("\tadd\tr12, pc, r12\n", file);
23579 }
23580 else if (TARGET_THUMB1_ONLY)
23581 fputs ("\tmov r12, r3\n", file);
23582 }
23583 if (TARGET_THUMB1_ONLY)
23584 {
23585 if (mi_delta > 255)
23586 {
23587 fputs ("\tldr\tr3, ", file);
23588 assemble_name (file, label);
23589 fputs ("+4\n", file);
23590 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23591 mi_op, this_regno, this_regno);
23592 }
23593 else if (mi_delta != 0)
23594 {
23595 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23596 mi_op, this_regno, this_regno,
23597 mi_delta);
23598 }
23599 }
23600 else
23601 {
23602 /* TODO: Use movw/movt for large constants when available. */
23603 while (mi_delta != 0)
23604 {
23605 if ((mi_delta & (3 << shift)) == 0)
23606 shift += 2;
23607 else
23608 {
23609 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23610 mi_op, this_regno, this_regno,
23611 mi_delta & (0xff << shift));
23612 mi_delta &= ~(0xff << shift);
23613 shift += 8;
23614 }
23615 }
23616 }
23617 if (TARGET_THUMB1)
23618 {
23619 if (TARGET_THUMB1_ONLY)
23620 fputs ("\tpop\t{r3}\n", file);
23621
23622 fprintf (file, "\tbx\tr12\n");
23623 ASM_OUTPUT_ALIGN (file, 2);
23624 assemble_name (file, label);
23625 fputs (":\n", file);
23626 if (flag_pic)
23627 {
23628 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23629 rtx tem = XEXP (DECL_RTL (function), 0);
23630 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23631 tem = gen_rtx_MINUS (GET_MODE (tem),
23632 tem,
23633 gen_rtx_SYMBOL_REF (Pmode,
23634 ggc_strdup (labelpc)));
23635 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23636 }
23637 else
23638 /* Output ".word .LTHUNKn". */
23639 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23640
23641 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23642 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23643 }
23644 else
23645 {
23646 fputs ("\tb\t", file);
23647 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23648 if (NEED_PLT_RELOC)
23649 fputs ("(PLT)", file);
23650 fputc ('\n', file);
23651 }
23652 }
23653
23654 int
23655 arm_emit_vector_const (FILE *file, rtx x)
23656 {
23657 int i;
23658 const char * pattern;
23659
23660 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23661
23662 switch (GET_MODE (x))
23663 {
23664 case V2SImode: pattern = "%08x"; break;
23665 case V4HImode: pattern = "%04x"; break;
23666 case V8QImode: pattern = "%02x"; break;
23667 default: gcc_unreachable ();
23668 }
23669
23670 fprintf (file, "0x");
23671 for (i = CONST_VECTOR_NUNITS (x); i--;)
23672 {
23673 rtx element;
23674
23675 element = CONST_VECTOR_ELT (x, i);
23676 fprintf (file, pattern, INTVAL (element));
23677 }
23678
23679 return 1;
23680 }
23681
23682 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23683 HFmode constant pool entries are actually loaded with ldr. */
23684 void
23685 arm_emit_fp16_const (rtx c)
23686 {
23687 REAL_VALUE_TYPE r;
23688 long bits;
23689
23690 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23691 bits = real_to_target (NULL, &r, HFmode);
23692 if (WORDS_BIG_ENDIAN)
23693 assemble_zeros (2);
23694 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23695 if (!WORDS_BIG_ENDIAN)
23696 assemble_zeros (2);
23697 }
23698
23699 const char *
23700 arm_output_load_gr (rtx *operands)
23701 {
23702 rtx reg;
23703 rtx offset;
23704 rtx wcgr;
23705 rtx sum;
23706
23707 if (!MEM_P (operands [1])
23708 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23709 || !REG_P (reg = XEXP (sum, 0))
23710 || !CONST_INT_P (offset = XEXP (sum, 1))
23711 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23712 return "wldrw%?\t%0, %1";
23713
23714 /* Fix up an out-of-range load of a GR register. */
23715 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23716 wcgr = operands[0];
23717 operands[0] = reg;
23718 output_asm_insn ("ldr%?\t%0, %1", operands);
23719
23720 operands[0] = wcgr;
23721 operands[1] = reg;
23722 output_asm_insn ("tmcr%?\t%0, %1", operands);
23723 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23724
23725 return "";
23726 }
23727
23728 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23729
23730 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23731 named arg and all anonymous args onto the stack.
23732 XXX I know the prologue shouldn't be pushing registers, but it is faster
23733 that way. */
23734
23735 static void
23736 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23737 enum machine_mode mode,
23738 tree type,
23739 int *pretend_size,
23740 int second_time ATTRIBUTE_UNUSED)
23741 {
23742 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23743 int nregs;
23744
23745 cfun->machine->uses_anonymous_args = 1;
23746 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23747 {
23748 nregs = pcum->aapcs_ncrn;
23749 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23750 nregs++;
23751 }
23752 else
23753 nregs = pcum->nregs;
23754
23755 if (nregs < NUM_ARG_REGS)
23756 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23757 }
23758
23759 /* Return nonzero if the CONSUMER instruction (a store) does not need
23760 PRODUCER's value to calculate the address. */
23761
23762 int
23763 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23764 {
23765 rtx value = PATTERN (producer);
23766 rtx addr = PATTERN (consumer);
23767
23768 if (GET_CODE (value) == COND_EXEC)
23769 value = COND_EXEC_CODE (value);
23770 if (GET_CODE (value) == PARALLEL)
23771 value = XVECEXP (value, 0, 0);
23772 value = XEXP (value, 0);
23773 if (GET_CODE (addr) == COND_EXEC)
23774 addr = COND_EXEC_CODE (addr);
23775 if (GET_CODE (addr) == PARALLEL)
23776 addr = XVECEXP (addr, 0, 0);
23777 addr = XEXP (addr, 0);
23778
23779 return !reg_overlap_mentioned_p (value, addr);
23780 }
23781
23782 /* Return nonzero if the CONSUMER instruction (a store) does need
23783 PRODUCER's value to calculate the address. */
23784
23785 int
23786 arm_early_store_addr_dep (rtx producer, rtx consumer)
23787 {
23788 return !arm_no_early_store_addr_dep (producer, consumer);
23789 }
23790
23791 /* Return nonzero if the CONSUMER instruction (a load) does need
23792 PRODUCER's value to calculate the address. */
23793
23794 int
23795 arm_early_load_addr_dep (rtx producer, rtx consumer)
23796 {
23797 rtx value = PATTERN (producer);
23798 rtx addr = PATTERN (consumer);
23799
23800 if (GET_CODE (value) == COND_EXEC)
23801 value = COND_EXEC_CODE (value);
23802 if (GET_CODE (value) == PARALLEL)
23803 value = XVECEXP (value, 0, 0);
23804 value = XEXP (value, 0);
23805 if (GET_CODE (addr) == COND_EXEC)
23806 addr = COND_EXEC_CODE (addr);
23807 if (GET_CODE (addr) == PARALLEL)
23808 {
23809 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
23810 addr = XVECEXP (addr, 0, 1);
23811 else
23812 addr = XVECEXP (addr, 0, 0);
23813 }
23814 addr = XEXP (addr, 1);
23815
23816 return reg_overlap_mentioned_p (value, addr);
23817 }
23818
23819 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23820 have an early register shift value or amount dependency on the
23821 result of PRODUCER. */
23822
23823 int
23824 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23825 {
23826 rtx value = PATTERN (producer);
23827 rtx op = PATTERN (consumer);
23828 rtx early_op;
23829
23830 if (GET_CODE (value) == COND_EXEC)
23831 value = COND_EXEC_CODE (value);
23832 if (GET_CODE (value) == PARALLEL)
23833 value = XVECEXP (value, 0, 0);
23834 value = XEXP (value, 0);
23835 if (GET_CODE (op) == COND_EXEC)
23836 op = COND_EXEC_CODE (op);
23837 if (GET_CODE (op) == PARALLEL)
23838 op = XVECEXP (op, 0, 0);
23839 op = XEXP (op, 1);
23840
23841 early_op = XEXP (op, 0);
23842 /* This is either an actual independent shift, or a shift applied to
23843 the first operand of another operation. We want the whole shift
23844 operation. */
23845 if (REG_P (early_op))
23846 early_op = op;
23847
23848 return !reg_overlap_mentioned_p (value, early_op);
23849 }
23850
23851 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23852 have an early register shift value dependency on the result of
23853 PRODUCER. */
23854
23855 int
23856 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23857 {
23858 rtx value = PATTERN (producer);
23859 rtx op = PATTERN (consumer);
23860 rtx early_op;
23861
23862 if (GET_CODE (value) == COND_EXEC)
23863 value = COND_EXEC_CODE (value);
23864 if (GET_CODE (value) == PARALLEL)
23865 value = XVECEXP (value, 0, 0);
23866 value = XEXP (value, 0);
23867 if (GET_CODE (op) == COND_EXEC)
23868 op = COND_EXEC_CODE (op);
23869 if (GET_CODE (op) == PARALLEL)
23870 op = XVECEXP (op, 0, 0);
23871 op = XEXP (op, 1);
23872
23873 early_op = XEXP (op, 0);
23874
23875 /* This is either an actual independent shift, or a shift applied to
23876 the first operand of another operation. We want the value being
23877 shifted, in either case. */
23878 if (!REG_P (early_op))
23879 early_op = XEXP (early_op, 0);
23880
23881 return !reg_overlap_mentioned_p (value, early_op);
23882 }
23883
23884 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23885 have an early register mult dependency on the result of
23886 PRODUCER. */
23887
23888 int
23889 arm_no_early_mul_dep (rtx producer, rtx consumer)
23890 {
23891 rtx value = PATTERN (producer);
23892 rtx op = PATTERN (consumer);
23893
23894 if (GET_CODE (value) == COND_EXEC)
23895 value = COND_EXEC_CODE (value);
23896 if (GET_CODE (value) == PARALLEL)
23897 value = XVECEXP (value, 0, 0);
23898 value = XEXP (value, 0);
23899 if (GET_CODE (op) == COND_EXEC)
23900 op = COND_EXEC_CODE (op);
23901 if (GET_CODE (op) == PARALLEL)
23902 op = XVECEXP (op, 0, 0);
23903 op = XEXP (op, 1);
23904
23905 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23906 {
23907 if (GET_CODE (XEXP (op, 0)) == MULT)
23908 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23909 else
23910 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23911 }
23912
23913 return 0;
23914 }
23915
23916 /* We can't rely on the caller doing the proper promotion when
23917 using APCS or ATPCS. */
23918
23919 static bool
23920 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23921 {
23922 return !TARGET_AAPCS_BASED;
23923 }
23924
23925 static enum machine_mode
23926 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23927 enum machine_mode mode,
23928 int *punsignedp ATTRIBUTE_UNUSED,
23929 const_tree fntype ATTRIBUTE_UNUSED,
23930 int for_return ATTRIBUTE_UNUSED)
23931 {
23932 if (GET_MODE_CLASS (mode) == MODE_INT
23933 && GET_MODE_SIZE (mode) < 4)
23934 return SImode;
23935
23936 return mode;
23937 }
23938
23939 /* AAPCS based ABIs use short enums by default. */
23940
23941 static bool
23942 arm_default_short_enums (void)
23943 {
23944 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23945 }
23946
23947
23948 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23949
23950 static bool
23951 arm_align_anon_bitfield (void)
23952 {
23953 return TARGET_AAPCS_BASED;
23954 }
23955
23956
23957 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23958
23959 static tree
23960 arm_cxx_guard_type (void)
23961 {
23962 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23963 }
23964
23965 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23966 has an accumulator dependency on the result of the producer (a
23967 multiplication instruction) and no other dependency on that result. */
23968 int
23969 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23970 {
23971 rtx mul = PATTERN (producer);
23972 rtx mac = PATTERN (consumer);
23973 rtx mul_result;
23974 rtx mac_op0, mac_op1, mac_acc;
23975
23976 if (GET_CODE (mul) == COND_EXEC)
23977 mul = COND_EXEC_CODE (mul);
23978 if (GET_CODE (mac) == COND_EXEC)
23979 mac = COND_EXEC_CODE (mac);
23980
23981 /* Check that mul is of the form (set (...) (mult ...))
23982 and mla is of the form (set (...) (plus (mult ...) (...))). */
23983 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23984 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23985 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23986 return 0;
23987
23988 mul_result = XEXP (mul, 0);
23989 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23990 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23991 mac_acc = XEXP (XEXP (mac, 1), 1);
23992
23993 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23994 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23995 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23996 }
23997
23998
23999 /* The EABI says test the least significant bit of a guard variable. */
24000
24001 static bool
24002 arm_cxx_guard_mask_bit (void)
24003 {
24004 return TARGET_AAPCS_BASED;
24005 }
24006
24007
24008 /* The EABI specifies that all array cookies are 8 bytes long. */
24009
24010 static tree
24011 arm_get_cookie_size (tree type)
24012 {
24013 tree size;
24014
24015 if (!TARGET_AAPCS_BASED)
24016 return default_cxx_get_cookie_size (type);
24017
24018 size = build_int_cst (sizetype, 8);
24019 return size;
24020 }
24021
24022
24023 /* The EABI says that array cookies should also contain the element size. */
24024
24025 static bool
24026 arm_cookie_has_size (void)
24027 {
24028 return TARGET_AAPCS_BASED;
24029 }
24030
24031
24032 /* The EABI says constructors and destructors should return a pointer to
24033 the object constructed/destroyed. */
24034
24035 static bool
24036 arm_cxx_cdtor_returns_this (void)
24037 {
24038 return TARGET_AAPCS_BASED;
24039 }
24040
24041 /* The EABI says that an inline function may never be the key
24042 method. */
24043
24044 static bool
24045 arm_cxx_key_method_may_be_inline (void)
24046 {
24047 return !TARGET_AAPCS_BASED;
24048 }
24049
24050 static void
24051 arm_cxx_determine_class_data_visibility (tree decl)
24052 {
24053 if (!TARGET_AAPCS_BASED
24054 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24055 return;
24056
24057 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24058 is exported. However, on systems without dynamic vague linkage,
24059 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24060 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24061 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24062 else
24063 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24064 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24065 }
24066
24067 static bool
24068 arm_cxx_class_data_always_comdat (void)
24069 {
24070 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24071 vague linkage if the class has no key function. */
24072 return !TARGET_AAPCS_BASED;
24073 }
24074
24075
24076 /* The EABI says __aeabi_atexit should be used to register static
24077 destructors. */
24078
24079 static bool
24080 arm_cxx_use_aeabi_atexit (void)
24081 {
24082 return TARGET_AAPCS_BASED;
24083 }
24084
24085
24086 void
24087 arm_set_return_address (rtx source, rtx scratch)
24088 {
24089 arm_stack_offsets *offsets;
24090 HOST_WIDE_INT delta;
24091 rtx addr;
24092 unsigned long saved_regs;
24093
24094 offsets = arm_get_frame_offsets ();
24095 saved_regs = offsets->saved_regs_mask;
24096
24097 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24098 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24099 else
24100 {
24101 if (frame_pointer_needed)
24102 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24103 else
24104 {
24105 /* LR will be the first saved register. */
24106 delta = offsets->outgoing_args - (offsets->frame + 4);
24107
24108
24109 if (delta >= 4096)
24110 {
24111 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24112 GEN_INT (delta & ~4095)));
24113 addr = scratch;
24114 delta &= 4095;
24115 }
24116 else
24117 addr = stack_pointer_rtx;
24118
24119 addr = plus_constant (Pmode, addr, delta);
24120 }
24121 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24122 }
24123 }
24124
24125
24126 void
24127 thumb_set_return_address (rtx source, rtx scratch)
24128 {
24129 arm_stack_offsets *offsets;
24130 HOST_WIDE_INT delta;
24131 HOST_WIDE_INT limit;
24132 int reg;
24133 rtx addr;
24134 unsigned long mask;
24135
24136 emit_use (source);
24137
24138 offsets = arm_get_frame_offsets ();
24139 mask = offsets->saved_regs_mask;
24140 if (mask & (1 << LR_REGNUM))
24141 {
24142 limit = 1024;
24143 /* Find the saved regs. */
24144 if (frame_pointer_needed)
24145 {
24146 delta = offsets->soft_frame - offsets->saved_args;
24147 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24148 if (TARGET_THUMB1)
24149 limit = 128;
24150 }
24151 else
24152 {
24153 delta = offsets->outgoing_args - offsets->saved_args;
24154 reg = SP_REGNUM;
24155 }
24156 /* Allow for the stack frame. */
24157 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24158 delta -= 16;
24159 /* The link register is always the first saved register. */
24160 delta -= 4;
24161
24162 /* Construct the address. */
24163 addr = gen_rtx_REG (SImode, reg);
24164 if (delta > limit)
24165 {
24166 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24167 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24168 addr = scratch;
24169 }
24170 else
24171 addr = plus_constant (Pmode, addr, delta);
24172
24173 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24174 }
24175 else
24176 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24177 }
24178
24179 /* Implements target hook vector_mode_supported_p. */
24180 bool
24181 arm_vector_mode_supported_p (enum machine_mode mode)
24182 {
24183 /* Neon also supports V2SImode, etc. listed in the clause below. */
24184 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24185 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24186 return true;
24187
24188 if ((TARGET_NEON || TARGET_IWMMXT)
24189 && ((mode == V2SImode)
24190 || (mode == V4HImode)
24191 || (mode == V8QImode)))
24192 return true;
24193
24194 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24195 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24196 || mode == V2HAmode))
24197 return true;
24198
24199 return false;
24200 }
24201
24202 /* Implements target hook array_mode_supported_p. */
24203
24204 static bool
24205 arm_array_mode_supported_p (enum machine_mode mode,
24206 unsigned HOST_WIDE_INT nelems)
24207 {
24208 if (TARGET_NEON
24209 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24210 && (nelems >= 2 && nelems <= 4))
24211 return true;
24212
24213 return false;
24214 }
24215
24216 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24217 registers when autovectorizing for Neon, at least until multiple vector
24218 widths are supported properly by the middle-end. */
24219
24220 static enum machine_mode
24221 arm_preferred_simd_mode (enum machine_mode mode)
24222 {
24223 if (TARGET_NEON)
24224 switch (mode)
24225 {
24226 case SFmode:
24227 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24228 case SImode:
24229 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24230 case HImode:
24231 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24232 case QImode:
24233 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24234 case DImode:
24235 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24236 return V2DImode;
24237 break;
24238
24239 default:;
24240 }
24241
24242 if (TARGET_REALLY_IWMMXT)
24243 switch (mode)
24244 {
24245 case SImode:
24246 return V2SImode;
24247 case HImode:
24248 return V4HImode;
24249 case QImode:
24250 return V8QImode;
24251
24252 default:;
24253 }
24254
24255 return word_mode;
24256 }
24257
24258 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24259
24260 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24261 using r0-r4 for function arguments, r7 for the stack frame and don't have
24262 enough left over to do doubleword arithmetic. For Thumb-2 all the
24263 potentially problematic instructions accept high registers so this is not
24264 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24265 that require many low registers. */
24266 static bool
24267 arm_class_likely_spilled_p (reg_class_t rclass)
24268 {
24269 if ((TARGET_THUMB1 && rclass == LO_REGS)
24270 || rclass == CC_REG)
24271 return true;
24272
24273 return false;
24274 }
24275
24276 /* Implements target hook small_register_classes_for_mode_p. */
24277 bool
24278 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24279 {
24280 return TARGET_THUMB1;
24281 }
24282
24283 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24284 ARM insns and therefore guarantee that the shift count is modulo 256.
24285 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24286 guarantee no particular behavior for out-of-range counts. */
24287
24288 static unsigned HOST_WIDE_INT
24289 arm_shift_truncation_mask (enum machine_mode mode)
24290 {
24291 return mode == SImode ? 255 : 0;
24292 }
24293
24294
24295 /* Map internal gcc register numbers to DWARF2 register numbers. */
24296
24297 unsigned int
24298 arm_dbx_register_number (unsigned int regno)
24299 {
24300 if (regno < 16)
24301 return regno;
24302
24303 if (IS_VFP_REGNUM (regno))
24304 {
24305 /* See comment in arm_dwarf_register_span. */
24306 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24307 return 64 + regno - FIRST_VFP_REGNUM;
24308 else
24309 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24310 }
24311
24312 if (IS_IWMMXT_GR_REGNUM (regno))
24313 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24314
24315 if (IS_IWMMXT_REGNUM (regno))
24316 return 112 + regno - FIRST_IWMMXT_REGNUM;
24317
24318 gcc_unreachable ();
24319 }
24320
24321 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24322 GCC models tham as 64 32-bit registers, so we need to describe this to
24323 the DWARF generation code. Other registers can use the default. */
24324 static rtx
24325 arm_dwarf_register_span (rtx rtl)
24326 {
24327 unsigned regno;
24328 int nregs;
24329 int i;
24330 rtx p;
24331
24332 regno = REGNO (rtl);
24333 if (!IS_VFP_REGNUM (regno))
24334 return NULL_RTX;
24335
24336 /* XXX FIXME: The EABI defines two VFP register ranges:
24337 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24338 256-287: D0-D31
24339 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24340 corresponding D register. Until GDB supports this, we shall use the
24341 legacy encodings. We also use these encodings for D0-D15 for
24342 compatibility with older debuggers. */
24343 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24344 return NULL_RTX;
24345
24346 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24347 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24348 regno = (regno - FIRST_VFP_REGNUM) / 2;
24349 for (i = 0; i < nregs; i++)
24350 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24351
24352 return p;
24353 }
24354
24355 #if ARM_UNWIND_INFO
24356 /* Emit unwind directives for a store-multiple instruction or stack pointer
24357 push during alignment.
24358 These should only ever be generated by the function prologue code, so
24359 expect them to have a particular form. */
24360
24361 static void
24362 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24363 {
24364 int i;
24365 HOST_WIDE_INT offset;
24366 HOST_WIDE_INT nregs;
24367 int reg_size;
24368 unsigned reg;
24369 unsigned lastreg;
24370 rtx e;
24371
24372 e = XVECEXP (p, 0, 0);
24373 if (GET_CODE (e) != SET)
24374 abort ();
24375
24376 /* First insn will adjust the stack pointer. */
24377 if (GET_CODE (e) != SET
24378 || !REG_P (XEXP (e, 0))
24379 || REGNO (XEXP (e, 0)) != SP_REGNUM
24380 || GET_CODE (XEXP (e, 1)) != PLUS)
24381 abort ();
24382
24383 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24384 nregs = XVECLEN (p, 0) - 1;
24385
24386 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24387 if (reg < 16)
24388 {
24389 /* The function prologue may also push pc, but not annotate it as it is
24390 never restored. We turn this into a stack pointer adjustment. */
24391 if (nregs * 4 == offset - 4)
24392 {
24393 fprintf (asm_out_file, "\t.pad #4\n");
24394 offset -= 4;
24395 }
24396 reg_size = 4;
24397 fprintf (asm_out_file, "\t.save {");
24398 }
24399 else if (IS_VFP_REGNUM (reg))
24400 {
24401 reg_size = 8;
24402 fprintf (asm_out_file, "\t.vsave {");
24403 }
24404 else
24405 /* Unknown register type. */
24406 abort ();
24407
24408 /* If the stack increment doesn't match the size of the saved registers,
24409 something has gone horribly wrong. */
24410 if (offset != nregs * reg_size)
24411 abort ();
24412
24413 offset = 0;
24414 lastreg = 0;
24415 /* The remaining insns will describe the stores. */
24416 for (i = 1; i <= nregs; i++)
24417 {
24418 /* Expect (set (mem <addr>) (reg)).
24419 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24420 e = XVECEXP (p, 0, i);
24421 if (GET_CODE (e) != SET
24422 || !MEM_P (XEXP (e, 0))
24423 || !REG_P (XEXP (e, 1)))
24424 abort ();
24425
24426 reg = REGNO (XEXP (e, 1));
24427 if (reg < lastreg)
24428 abort ();
24429
24430 if (i != 1)
24431 fprintf (asm_out_file, ", ");
24432 /* We can't use %r for vfp because we need to use the
24433 double precision register names. */
24434 if (IS_VFP_REGNUM (reg))
24435 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24436 else
24437 asm_fprintf (asm_out_file, "%r", reg);
24438
24439 #ifdef ENABLE_CHECKING
24440 /* Check that the addresses are consecutive. */
24441 e = XEXP (XEXP (e, 0), 0);
24442 if (GET_CODE (e) == PLUS)
24443 {
24444 offset += reg_size;
24445 if (!REG_P (XEXP (e, 0))
24446 || REGNO (XEXP (e, 0)) != SP_REGNUM
24447 || !CONST_INT_P (XEXP (e, 1))
24448 || offset != INTVAL (XEXP (e, 1)))
24449 abort ();
24450 }
24451 else if (i != 1
24452 || !REG_P (e)
24453 || REGNO (e) != SP_REGNUM)
24454 abort ();
24455 #endif
24456 }
24457 fprintf (asm_out_file, "}\n");
24458 }
24459
24460 /* Emit unwind directives for a SET. */
24461
24462 static void
24463 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24464 {
24465 rtx e0;
24466 rtx e1;
24467 unsigned reg;
24468
24469 e0 = XEXP (p, 0);
24470 e1 = XEXP (p, 1);
24471 switch (GET_CODE (e0))
24472 {
24473 case MEM:
24474 /* Pushing a single register. */
24475 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24476 || !REG_P (XEXP (XEXP (e0, 0), 0))
24477 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24478 abort ();
24479
24480 asm_fprintf (asm_out_file, "\t.save ");
24481 if (IS_VFP_REGNUM (REGNO (e1)))
24482 asm_fprintf(asm_out_file, "{d%d}\n",
24483 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24484 else
24485 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24486 break;
24487
24488 case REG:
24489 if (REGNO (e0) == SP_REGNUM)
24490 {
24491 /* A stack increment. */
24492 if (GET_CODE (e1) != PLUS
24493 || !REG_P (XEXP (e1, 0))
24494 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24495 || !CONST_INT_P (XEXP (e1, 1)))
24496 abort ();
24497
24498 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24499 -INTVAL (XEXP (e1, 1)));
24500 }
24501 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24502 {
24503 HOST_WIDE_INT offset;
24504
24505 if (GET_CODE (e1) == PLUS)
24506 {
24507 if (!REG_P (XEXP (e1, 0))
24508 || !CONST_INT_P (XEXP (e1, 1)))
24509 abort ();
24510 reg = REGNO (XEXP (e1, 0));
24511 offset = INTVAL (XEXP (e1, 1));
24512 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24513 HARD_FRAME_POINTER_REGNUM, reg,
24514 offset);
24515 }
24516 else if (REG_P (e1))
24517 {
24518 reg = REGNO (e1);
24519 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24520 HARD_FRAME_POINTER_REGNUM, reg);
24521 }
24522 else
24523 abort ();
24524 }
24525 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
24526 {
24527 /* Move from sp to reg. */
24528 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24529 }
24530 else if (GET_CODE (e1) == PLUS
24531 && REG_P (XEXP (e1, 0))
24532 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24533 && CONST_INT_P (XEXP (e1, 1)))
24534 {
24535 /* Set reg to offset from sp. */
24536 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24537 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24538 }
24539 else
24540 abort ();
24541 break;
24542
24543 default:
24544 abort ();
24545 }
24546 }
24547
24548
24549 /* Emit unwind directives for the given insn. */
24550
24551 static void
24552 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24553 {
24554 rtx note, pat;
24555 bool handled_one = false;
24556
24557 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24558 return;
24559
24560 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24561 && (TREE_NOTHROW (current_function_decl)
24562 || crtl->all_throwers_are_sibcalls))
24563 return;
24564
24565 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24566 return;
24567
24568 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24569 {
24570 pat = XEXP (note, 0);
24571 switch (REG_NOTE_KIND (note))
24572 {
24573 case REG_FRAME_RELATED_EXPR:
24574 goto found;
24575
24576 case REG_CFA_REGISTER:
24577 if (pat == NULL)
24578 {
24579 pat = PATTERN (insn);
24580 if (GET_CODE (pat) == PARALLEL)
24581 pat = XVECEXP (pat, 0, 0);
24582 }
24583
24584 /* Only emitted for IS_STACKALIGN re-alignment. */
24585 {
24586 rtx dest, src;
24587 unsigned reg;
24588
24589 src = SET_SRC (pat);
24590 dest = SET_DEST (pat);
24591
24592 gcc_assert (src == stack_pointer_rtx);
24593 reg = REGNO (dest);
24594 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24595 reg + 0x90, reg);
24596 }
24597 handled_one = true;
24598 break;
24599
24600 case REG_CFA_DEF_CFA:
24601 case REG_CFA_EXPRESSION:
24602 case REG_CFA_ADJUST_CFA:
24603 case REG_CFA_OFFSET:
24604 /* ??? Only handling here what we actually emit. */
24605 gcc_unreachable ();
24606
24607 default:
24608 break;
24609 }
24610 }
24611 if (handled_one)
24612 return;
24613 pat = PATTERN (insn);
24614 found:
24615
24616 switch (GET_CODE (pat))
24617 {
24618 case SET:
24619 arm_unwind_emit_set (asm_out_file, pat);
24620 break;
24621
24622 case SEQUENCE:
24623 /* Store multiple. */
24624 arm_unwind_emit_sequence (asm_out_file, pat);
24625 break;
24626
24627 default:
24628 abort();
24629 }
24630 }
24631
24632
24633 /* Output a reference from a function exception table to the type_info
24634 object X. The EABI specifies that the symbol should be relocated by
24635 an R_ARM_TARGET2 relocation. */
24636
24637 static bool
24638 arm_output_ttype (rtx x)
24639 {
24640 fputs ("\t.word\t", asm_out_file);
24641 output_addr_const (asm_out_file, x);
24642 /* Use special relocations for symbol references. */
24643 if (!CONST_INT_P (x))
24644 fputs ("(TARGET2)", asm_out_file);
24645 fputc ('\n', asm_out_file);
24646
24647 return TRUE;
24648 }
24649
24650 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24651
24652 static void
24653 arm_asm_emit_except_personality (rtx personality)
24654 {
24655 fputs ("\t.personality\t", asm_out_file);
24656 output_addr_const (asm_out_file, personality);
24657 fputc ('\n', asm_out_file);
24658 }
24659
24660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24661
24662 static void
24663 arm_asm_init_sections (void)
24664 {
24665 exception_section = get_unnamed_section (0, output_section_asm_op,
24666 "\t.handlerdata");
24667 }
24668 #endif /* ARM_UNWIND_INFO */
24669
24670 /* Output unwind directives for the start/end of a function. */
24671
24672 void
24673 arm_output_fn_unwind (FILE * f, bool prologue)
24674 {
24675 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24676 return;
24677
24678 if (prologue)
24679 fputs ("\t.fnstart\n", f);
24680 else
24681 {
24682 /* If this function will never be unwound, then mark it as such.
24683 The came condition is used in arm_unwind_emit to suppress
24684 the frame annotations. */
24685 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24686 && (TREE_NOTHROW (current_function_decl)
24687 || crtl->all_throwers_are_sibcalls))
24688 fputs("\t.cantunwind\n", f);
24689
24690 fputs ("\t.fnend\n", f);
24691 }
24692 }
24693
24694 static bool
24695 arm_emit_tls_decoration (FILE *fp, rtx x)
24696 {
24697 enum tls_reloc reloc;
24698 rtx val;
24699
24700 val = XVECEXP (x, 0, 0);
24701 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24702
24703 output_addr_const (fp, val);
24704
24705 switch (reloc)
24706 {
24707 case TLS_GD32:
24708 fputs ("(tlsgd)", fp);
24709 break;
24710 case TLS_LDM32:
24711 fputs ("(tlsldm)", fp);
24712 break;
24713 case TLS_LDO32:
24714 fputs ("(tlsldo)", fp);
24715 break;
24716 case TLS_IE32:
24717 fputs ("(gottpoff)", fp);
24718 break;
24719 case TLS_LE32:
24720 fputs ("(tpoff)", fp);
24721 break;
24722 case TLS_DESCSEQ:
24723 fputs ("(tlsdesc)", fp);
24724 break;
24725 default:
24726 gcc_unreachable ();
24727 }
24728
24729 switch (reloc)
24730 {
24731 case TLS_GD32:
24732 case TLS_LDM32:
24733 case TLS_IE32:
24734 case TLS_DESCSEQ:
24735 fputs (" + (. - ", fp);
24736 output_addr_const (fp, XVECEXP (x, 0, 2));
24737 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24738 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24739 output_addr_const (fp, XVECEXP (x, 0, 3));
24740 fputc (')', fp);
24741 break;
24742 default:
24743 break;
24744 }
24745
24746 return TRUE;
24747 }
24748
24749 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24750
24751 static void
24752 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24753 {
24754 gcc_assert (size == 4);
24755 fputs ("\t.word\t", file);
24756 output_addr_const (file, x);
24757 fputs ("(tlsldo)", file);
24758 }
24759
24760 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24761
24762 static bool
24763 arm_output_addr_const_extra (FILE *fp, rtx x)
24764 {
24765 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24766 return arm_emit_tls_decoration (fp, x);
24767 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24768 {
24769 char label[256];
24770 int labelno = INTVAL (XVECEXP (x, 0, 0));
24771
24772 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24773 assemble_name_raw (fp, label);
24774
24775 return TRUE;
24776 }
24777 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24778 {
24779 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24780 if (GOT_PCREL)
24781 fputs ("+.", fp);
24782 fputs ("-(", fp);
24783 output_addr_const (fp, XVECEXP (x, 0, 0));
24784 fputc (')', fp);
24785 return TRUE;
24786 }
24787 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24788 {
24789 output_addr_const (fp, XVECEXP (x, 0, 0));
24790 if (GOT_PCREL)
24791 fputs ("+.", fp);
24792 fputs ("-(", fp);
24793 output_addr_const (fp, XVECEXP (x, 0, 1));
24794 fputc (')', fp);
24795 return TRUE;
24796 }
24797 else if (GET_CODE (x) == CONST_VECTOR)
24798 return arm_emit_vector_const (fp, x);
24799
24800 return FALSE;
24801 }
24802
24803 /* Output assembly for a shift instruction.
24804 SET_FLAGS determines how the instruction modifies the condition codes.
24805 0 - Do not set condition codes.
24806 1 - Set condition codes.
24807 2 - Use smallest instruction. */
24808 const char *
24809 arm_output_shift(rtx * operands, int set_flags)
24810 {
24811 char pattern[100];
24812 static const char flag_chars[3] = {'?', '.', '!'};
24813 const char *shift;
24814 HOST_WIDE_INT val;
24815 char c;
24816
24817 c = flag_chars[set_flags];
24818 if (TARGET_UNIFIED_ASM)
24819 {
24820 shift = shift_op(operands[3], &val);
24821 if (shift)
24822 {
24823 if (val != -1)
24824 operands[2] = GEN_INT(val);
24825 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24826 }
24827 else
24828 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24829 }
24830 else
24831 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24832 output_asm_insn (pattern, operands);
24833 return "";
24834 }
24835
24836 /* Output assembly for a WMMX immediate shift instruction. */
24837 const char *
24838 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24839 {
24840 int shift = INTVAL (operands[2]);
24841 char templ[50];
24842 enum machine_mode opmode = GET_MODE (operands[0]);
24843
24844 gcc_assert (shift >= 0);
24845
24846 /* If the shift value in the register versions is > 63 (for D qualifier),
24847 31 (for W qualifier) or 15 (for H qualifier). */
24848 if (((opmode == V4HImode) && (shift > 15))
24849 || ((opmode == V2SImode) && (shift > 31))
24850 || ((opmode == DImode) && (shift > 63)))
24851 {
24852 if (wror_or_wsra)
24853 {
24854 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24855 output_asm_insn (templ, operands);
24856 if (opmode == DImode)
24857 {
24858 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24859 output_asm_insn (templ, operands);
24860 }
24861 }
24862 else
24863 {
24864 /* The destination register will contain all zeros. */
24865 sprintf (templ, "wzero\t%%0");
24866 output_asm_insn (templ, operands);
24867 }
24868 return "";
24869 }
24870
24871 if ((opmode == DImode) && (shift > 32))
24872 {
24873 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24874 output_asm_insn (templ, operands);
24875 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24876 output_asm_insn (templ, operands);
24877 }
24878 else
24879 {
24880 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24881 output_asm_insn (templ, operands);
24882 }
24883 return "";
24884 }
24885
24886 /* Output assembly for a WMMX tinsr instruction. */
24887 const char *
24888 arm_output_iwmmxt_tinsr (rtx *operands)
24889 {
24890 int mask = INTVAL (operands[3]);
24891 int i;
24892 char templ[50];
24893 int units = mode_nunits[GET_MODE (operands[0])];
24894 gcc_assert ((mask & (mask - 1)) == 0);
24895 for (i = 0; i < units; ++i)
24896 {
24897 if ((mask & 0x01) == 1)
24898 {
24899 break;
24900 }
24901 mask >>= 1;
24902 }
24903 gcc_assert (i < units);
24904 {
24905 switch (GET_MODE (operands[0]))
24906 {
24907 case V8QImode:
24908 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24909 break;
24910 case V4HImode:
24911 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24912 break;
24913 case V2SImode:
24914 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24915 break;
24916 default:
24917 gcc_unreachable ();
24918 break;
24919 }
24920 output_asm_insn (templ, operands);
24921 }
24922 return "";
24923 }
24924
24925 /* Output a Thumb-1 casesi dispatch sequence. */
24926 const char *
24927 thumb1_output_casesi (rtx *operands)
24928 {
24929 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24930
24931 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24932
24933 switch (GET_MODE(diff_vec))
24934 {
24935 case QImode:
24936 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24937 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24938 case HImode:
24939 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24940 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24941 case SImode:
24942 return "bl\t%___gnu_thumb1_case_si";
24943 default:
24944 gcc_unreachable ();
24945 }
24946 }
24947
24948 /* Output a Thumb-2 casesi instruction. */
24949 const char *
24950 thumb2_output_casesi (rtx *operands)
24951 {
24952 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24953
24954 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24955
24956 output_asm_insn ("cmp\t%0, %1", operands);
24957 output_asm_insn ("bhi\t%l3", operands);
24958 switch (GET_MODE(diff_vec))
24959 {
24960 case QImode:
24961 return "tbb\t[%|pc, %0]";
24962 case HImode:
24963 return "tbh\t[%|pc, %0, lsl #1]";
24964 case SImode:
24965 if (flag_pic)
24966 {
24967 output_asm_insn ("adr\t%4, %l2", operands);
24968 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24969 output_asm_insn ("add\t%4, %4, %5", operands);
24970 return "bx\t%4";
24971 }
24972 else
24973 {
24974 output_asm_insn ("adr\t%4, %l2", operands);
24975 return "ldr\t%|pc, [%4, %0, lsl #2]";
24976 }
24977 default:
24978 gcc_unreachable ();
24979 }
24980 }
24981
24982 /* Most ARM cores are single issue, but some newer ones can dual issue.
24983 The scheduler descriptions rely on this being correct. */
24984 static int
24985 arm_issue_rate (void)
24986 {
24987 switch (arm_tune)
24988 {
24989 case cortexa15:
24990 return 3;
24991
24992 case cortexr4:
24993 case cortexr4f:
24994 case cortexr5:
24995 case genericv7a:
24996 case cortexa5:
24997 case cortexa8:
24998 case cortexa9:
24999 case fa726te:
25000 return 2;
25001
25002 default:
25003 return 1;
25004 }
25005 }
25006
25007 /* A table and a function to perform ARM-specific name mangling for
25008 NEON vector types in order to conform to the AAPCS (see "Procedure
25009 Call Standard for the ARM Architecture", Appendix A). To qualify
25010 for emission with the mangled names defined in that document, a
25011 vector type must not only be of the correct mode but also be
25012 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25013 typedef struct
25014 {
25015 enum machine_mode mode;
25016 const char *element_type_name;
25017 const char *aapcs_name;
25018 } arm_mangle_map_entry;
25019
25020 static arm_mangle_map_entry arm_mangle_map[] = {
25021 /* 64-bit containerized types. */
25022 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25023 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25024 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25025 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25026 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25027 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25028 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25029 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25030 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25031 /* 128-bit containerized types. */
25032 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25033 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25034 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25035 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25036 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25037 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25038 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25039 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25040 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25041 { VOIDmode, NULL, NULL }
25042 };
25043
25044 const char *
25045 arm_mangle_type (const_tree type)
25046 {
25047 arm_mangle_map_entry *pos = arm_mangle_map;
25048
25049 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25050 has to be managled as if it is in the "std" namespace. */
25051 if (TARGET_AAPCS_BASED
25052 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25053 {
25054 static bool warned;
25055 if (!warned && warn_psabi && !in_system_header)
25056 {
25057 warned = true;
25058 inform (input_location,
25059 "the mangling of %<va_list%> has changed in GCC 4.4");
25060 }
25061 return "St9__va_list";
25062 }
25063
25064 /* Half-precision float. */
25065 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25066 return "Dh";
25067
25068 if (TREE_CODE (type) != VECTOR_TYPE)
25069 return NULL;
25070
25071 /* Check the mode of the vector type, and the name of the vector
25072 element type, against the table. */
25073 while (pos->mode != VOIDmode)
25074 {
25075 tree elt_type = TREE_TYPE (type);
25076
25077 if (pos->mode == TYPE_MODE (type)
25078 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25079 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25080 pos->element_type_name))
25081 return pos->aapcs_name;
25082
25083 pos++;
25084 }
25085
25086 /* Use the default mangling for unrecognized (possibly user-defined)
25087 vector types. */
25088 return NULL;
25089 }
25090
25091 /* Order of allocation of core registers for Thumb: this allocation is
25092 written over the corresponding initial entries of the array
25093 initialized with REG_ALLOC_ORDER. We allocate all low registers
25094 first. Saving and restoring a low register is usually cheaper than
25095 using a call-clobbered high register. */
25096
25097 static const int thumb_core_reg_alloc_order[] =
25098 {
25099 3, 2, 1, 0, 4, 5, 6, 7,
25100 14, 12, 8, 9, 10, 11
25101 };
25102
25103 /* Adjust register allocation order when compiling for Thumb. */
25104
25105 void
25106 arm_order_regs_for_local_alloc (void)
25107 {
25108 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25109 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25110 if (TARGET_THUMB)
25111 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25112 sizeof (thumb_core_reg_alloc_order));
25113 }
25114
25115 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25116
25117 bool
25118 arm_frame_pointer_required (void)
25119 {
25120 return (cfun->has_nonlocal_label
25121 || SUBTARGET_FRAME_POINTER_REQUIRED
25122 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25123 }
25124
25125 /* Only thumb1 can't support conditional execution, so return true if
25126 the target is not thumb1. */
25127 static bool
25128 arm_have_conditional_execution (void)
25129 {
25130 return !TARGET_THUMB1;
25131 }
25132
25133 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25134 static HOST_WIDE_INT
25135 arm_vector_alignment (const_tree type)
25136 {
25137 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
25138
25139 if (TARGET_AAPCS_BASED)
25140 align = MIN (align, 64);
25141
25142 return align;
25143 }
25144
25145 static unsigned int
25146 arm_autovectorize_vector_sizes (void)
25147 {
25148 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25149 }
25150
25151 static bool
25152 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25153 {
25154 /* Vectors which aren't in packed structures will not be less aligned than
25155 the natural alignment of their element type, so this is safe. */
25156 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25157 return !is_packed;
25158
25159 return default_builtin_vector_alignment_reachable (type, is_packed);
25160 }
25161
25162 static bool
25163 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25164 const_tree type, int misalignment,
25165 bool is_packed)
25166 {
25167 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25168 {
25169 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25170
25171 if (is_packed)
25172 return align == 1;
25173
25174 /* If the misalignment is unknown, we should be able to handle the access
25175 so long as it is not to a member of a packed data structure. */
25176 if (misalignment == -1)
25177 return true;
25178
25179 /* Return true if the misalignment is a multiple of the natural alignment
25180 of the vector's element type. This is probably always going to be
25181 true in practice, since we've already established that this isn't a
25182 packed access. */
25183 return ((misalignment % align) == 0);
25184 }
25185
25186 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25187 is_packed);
25188 }
25189
25190 static void
25191 arm_conditional_register_usage (void)
25192 {
25193 int regno;
25194
25195 if (TARGET_THUMB1 && optimize_size)
25196 {
25197 /* When optimizing for size on Thumb-1, it's better not
25198 to use the HI regs, because of the overhead of
25199 stacking them. */
25200 for (regno = FIRST_HI_REGNUM;
25201 regno <= LAST_HI_REGNUM; ++regno)
25202 fixed_regs[regno] = call_used_regs[regno] = 1;
25203 }
25204
25205 /* The link register can be clobbered by any branch insn,
25206 but we have no way to track that at present, so mark
25207 it as unavailable. */
25208 if (TARGET_THUMB1)
25209 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25210
25211 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25212 {
25213 /* VFPv3 registers are disabled when earlier VFP
25214 versions are selected due to the definition of
25215 LAST_VFP_REGNUM. */
25216 for (regno = FIRST_VFP_REGNUM;
25217 regno <= LAST_VFP_REGNUM; ++ regno)
25218 {
25219 fixed_regs[regno] = 0;
25220 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25221 || regno >= FIRST_VFP_REGNUM + 32;
25222 }
25223 }
25224
25225 if (TARGET_REALLY_IWMMXT)
25226 {
25227 regno = FIRST_IWMMXT_GR_REGNUM;
25228 /* The 2002/10/09 revision of the XScale ABI has wCG0
25229 and wCG1 as call-preserved registers. The 2002/11/21
25230 revision changed this so that all wCG registers are
25231 scratch registers. */
25232 for (regno = FIRST_IWMMXT_GR_REGNUM;
25233 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25234 fixed_regs[regno] = 0;
25235 /* The XScale ABI has wR0 - wR9 as scratch registers,
25236 the rest as call-preserved registers. */
25237 for (regno = FIRST_IWMMXT_REGNUM;
25238 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25239 {
25240 fixed_regs[regno] = 0;
25241 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25242 }
25243 }
25244
25245 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25246 {
25247 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25248 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25249 }
25250 else if (TARGET_APCS_STACK)
25251 {
25252 fixed_regs[10] = 1;
25253 call_used_regs[10] = 1;
25254 }
25255 /* -mcaller-super-interworking reserves r11 for calls to
25256 _interwork_r11_call_via_rN(). Making the register global
25257 is an easy way of ensuring that it remains valid for all
25258 calls. */
25259 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25260 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25261 {
25262 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25263 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25264 if (TARGET_CALLER_INTERWORKING)
25265 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25266 }
25267 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25268 }
25269
25270 static reg_class_t
25271 arm_preferred_rename_class (reg_class_t rclass)
25272 {
25273 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25274 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25275 and code size can be reduced. */
25276 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25277 return LO_REGS;
25278 else
25279 return NO_REGS;
25280 }
25281
25282 /* Compute the atrribute "length" of insn "*push_multi".
25283 So this function MUST be kept in sync with that insn pattern. */
25284 int
25285 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25286 {
25287 int i, regno, hi_reg;
25288 int num_saves = XVECLEN (parallel_op, 0);
25289
25290 /* ARM mode. */
25291 if (TARGET_ARM)
25292 return 4;
25293 /* Thumb1 mode. */
25294 if (TARGET_THUMB1)
25295 return 2;
25296
25297 /* Thumb2 mode. */
25298 regno = REGNO (first_op);
25299 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25300 for (i = 1; i < num_saves && !hi_reg; i++)
25301 {
25302 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25303 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25304 }
25305
25306 if (!hi_reg)
25307 return 2;
25308 return 4;
25309 }
25310
25311 /* Compute the number of instructions emitted by output_move_double. */
25312 int
25313 arm_count_output_move_double_insns (rtx *operands)
25314 {
25315 int count;
25316 rtx ops[2];
25317 /* output_move_double may modify the operands array, so call it
25318 here on a copy of the array. */
25319 ops[0] = operands[0];
25320 ops[1] = operands[1];
25321 output_move_double (ops, false, &count);
25322 return count;
25323 }
25324
25325 int
25326 vfp3_const_double_for_fract_bits (rtx operand)
25327 {
25328 REAL_VALUE_TYPE r0;
25329
25330 if (!CONST_DOUBLE_P (operand))
25331 return 0;
25332
25333 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25334 if (exact_real_inverse (DFmode, &r0))
25335 {
25336 if (exact_real_truncate (DFmode, &r0))
25337 {
25338 HOST_WIDE_INT value = real_to_integer (&r0);
25339 value = value & 0xffffffff;
25340 if ((value != 0) && ( (value & (value - 1)) == 0))
25341 return int_log2 (value);
25342 }
25343 }
25344 return 0;
25345 }
25346 \f
25347 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25348
25349 static void
25350 arm_pre_atomic_barrier (enum memmodel model)
25351 {
25352 if (need_atomic_barrier_p (model, true))
25353 emit_insn (gen_memory_barrier ());
25354 }
25355
25356 static void
25357 arm_post_atomic_barrier (enum memmodel model)
25358 {
25359 if (need_atomic_barrier_p (model, false))
25360 emit_insn (gen_memory_barrier ());
25361 }
25362
25363 /* Emit the load-exclusive and store-exclusive instructions. */
25364
25365 static void
25366 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25367 {
25368 rtx (*gen) (rtx, rtx);
25369
25370 switch (mode)
25371 {
25372 case QImode: gen = gen_arm_load_exclusiveqi; break;
25373 case HImode: gen = gen_arm_load_exclusivehi; break;
25374 case SImode: gen = gen_arm_load_exclusivesi; break;
25375 case DImode: gen = gen_arm_load_exclusivedi; break;
25376 default:
25377 gcc_unreachable ();
25378 }
25379
25380 emit_insn (gen (rval, mem));
25381 }
25382
25383 static void
25384 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25385 {
25386 rtx (*gen) (rtx, rtx, rtx);
25387
25388 switch (mode)
25389 {
25390 case QImode: gen = gen_arm_store_exclusiveqi; break;
25391 case HImode: gen = gen_arm_store_exclusivehi; break;
25392 case SImode: gen = gen_arm_store_exclusivesi; break;
25393 case DImode: gen = gen_arm_store_exclusivedi; break;
25394 default:
25395 gcc_unreachable ();
25396 }
25397
25398 emit_insn (gen (bval, rval, mem));
25399 }
25400
25401 /* Mark the previous jump instruction as unlikely. */
25402
25403 static void
25404 emit_unlikely_jump (rtx insn)
25405 {
25406 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25407
25408 insn = emit_jump_insn (insn);
25409 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25410 }
25411
25412 /* Expand a compare and swap pattern. */
25413
25414 void
25415 arm_expand_compare_and_swap (rtx operands[])
25416 {
25417 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25418 enum machine_mode mode;
25419 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25420
25421 bval = operands[0];
25422 rval = operands[1];
25423 mem = operands[2];
25424 oldval = operands[3];
25425 newval = operands[4];
25426 is_weak = operands[5];
25427 mod_s = operands[6];
25428 mod_f = operands[7];
25429 mode = GET_MODE (mem);
25430
25431 switch (mode)
25432 {
25433 case QImode:
25434 case HImode:
25435 /* For narrow modes, we're going to perform the comparison in SImode,
25436 so do the zero-extension now. */
25437 rval = gen_reg_rtx (SImode);
25438 oldval = convert_modes (SImode, mode, oldval, true);
25439 /* FALLTHRU */
25440
25441 case SImode:
25442 /* Force the value into a register if needed. We waited until after
25443 the zero-extension above to do this properly. */
25444 if (!arm_add_operand (oldval, mode))
25445 oldval = force_reg (mode, oldval);
25446 break;
25447
25448 case DImode:
25449 if (!cmpdi_operand (oldval, mode))
25450 oldval = force_reg (mode, oldval);
25451 break;
25452
25453 default:
25454 gcc_unreachable ();
25455 }
25456
25457 switch (mode)
25458 {
25459 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25460 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25461 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25462 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25463 default:
25464 gcc_unreachable ();
25465 }
25466
25467 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25468
25469 if (mode == QImode || mode == HImode)
25470 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25471
25472 /* In all cases, we arrange for success to be signaled by Z set.
25473 This arrangement allows for the boolean result to be used directly
25474 in a subsequent branch, post optimization. */
25475 x = gen_rtx_REG (CCmode, CC_REGNUM);
25476 x = gen_rtx_EQ (SImode, x, const0_rtx);
25477 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25478 }
25479
25480 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25481 another memory store between the load-exclusive and store-exclusive can
25482 reset the monitor from Exclusive to Open state. This means we must wait
25483 until after reload to split the pattern, lest we get a register spill in
25484 the middle of the atomic sequence. */
25485
25486 void
25487 arm_split_compare_and_swap (rtx operands[])
25488 {
25489 rtx rval, mem, oldval, newval, scratch;
25490 enum machine_mode mode;
25491 enum memmodel mod_s, mod_f;
25492 bool is_weak;
25493 rtx label1, label2, x, cond;
25494
25495 rval = operands[0];
25496 mem = operands[1];
25497 oldval = operands[2];
25498 newval = operands[3];
25499 is_weak = (operands[4] != const0_rtx);
25500 mod_s = (enum memmodel) INTVAL (operands[5]);
25501 mod_f = (enum memmodel) INTVAL (operands[6]);
25502 scratch = operands[7];
25503 mode = GET_MODE (mem);
25504
25505 arm_pre_atomic_barrier (mod_s);
25506
25507 label1 = NULL_RTX;
25508 if (!is_weak)
25509 {
25510 label1 = gen_label_rtx ();
25511 emit_label (label1);
25512 }
25513 label2 = gen_label_rtx ();
25514
25515 arm_emit_load_exclusive (mode, rval, mem);
25516
25517 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25518 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25519 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25520 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25521 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25522
25523 arm_emit_store_exclusive (mode, scratch, mem, newval);
25524
25525 /* Weak or strong, we want EQ to be true for success, so that we
25526 match the flags that we got from the compare above. */
25527 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25528 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25529 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25530
25531 if (!is_weak)
25532 {
25533 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25534 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25535 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25536 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25537 }
25538
25539 if (mod_f != MEMMODEL_RELAXED)
25540 emit_label (label2);
25541
25542 arm_post_atomic_barrier (mod_s);
25543
25544 if (mod_f == MEMMODEL_RELAXED)
25545 emit_label (label2);
25546 }
25547
25548 void
25549 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25550 rtx value, rtx model_rtx, rtx cond)
25551 {
25552 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25553 enum machine_mode mode = GET_MODE (mem);
25554 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25555 rtx label, x;
25556
25557 arm_pre_atomic_barrier (model);
25558
25559 label = gen_label_rtx ();
25560 emit_label (label);
25561
25562 if (new_out)
25563 new_out = gen_lowpart (wmode, new_out);
25564 if (old_out)
25565 old_out = gen_lowpart (wmode, old_out);
25566 else
25567 old_out = new_out;
25568 value = simplify_gen_subreg (wmode, value, mode, 0);
25569
25570 arm_emit_load_exclusive (mode, old_out, mem);
25571
25572 switch (code)
25573 {
25574 case SET:
25575 new_out = value;
25576 break;
25577
25578 case NOT:
25579 x = gen_rtx_AND (wmode, old_out, value);
25580 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25581 x = gen_rtx_NOT (wmode, new_out);
25582 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25583 break;
25584
25585 case MINUS:
25586 if (CONST_INT_P (value))
25587 {
25588 value = GEN_INT (-INTVAL (value));
25589 code = PLUS;
25590 }
25591 /* FALLTHRU */
25592
25593 case PLUS:
25594 if (mode == DImode)
25595 {
25596 /* DImode plus/minus need to clobber flags. */
25597 /* The adddi3 and subdi3 patterns are incorrectly written so that
25598 they require matching operands, even when we could easily support
25599 three operands. Thankfully, this can be fixed up post-splitting,
25600 as the individual add+adc patterns do accept three operands and
25601 post-reload cprop can make these moves go away. */
25602 emit_move_insn (new_out, old_out);
25603 if (code == PLUS)
25604 x = gen_adddi3 (new_out, new_out, value);
25605 else
25606 x = gen_subdi3 (new_out, new_out, value);
25607 emit_insn (x);
25608 break;
25609 }
25610 /* FALLTHRU */
25611
25612 default:
25613 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25614 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25615 break;
25616 }
25617
25618 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25619
25620 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25621 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25622
25623 arm_post_atomic_barrier (model);
25624 }
25625 \f
25626 #define MAX_VECT_LEN 16
25627
25628 struct expand_vec_perm_d
25629 {
25630 rtx target, op0, op1;
25631 unsigned char perm[MAX_VECT_LEN];
25632 enum machine_mode vmode;
25633 unsigned char nelt;
25634 bool one_vector_p;
25635 bool testing_p;
25636 };
25637
25638 /* Generate a variable permutation. */
25639
25640 static void
25641 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25642 {
25643 enum machine_mode vmode = GET_MODE (target);
25644 bool one_vector_p = rtx_equal_p (op0, op1);
25645
25646 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25647 gcc_checking_assert (GET_MODE (op0) == vmode);
25648 gcc_checking_assert (GET_MODE (op1) == vmode);
25649 gcc_checking_assert (GET_MODE (sel) == vmode);
25650 gcc_checking_assert (TARGET_NEON);
25651
25652 if (one_vector_p)
25653 {
25654 if (vmode == V8QImode)
25655 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25656 else
25657 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25658 }
25659 else
25660 {
25661 rtx pair;
25662
25663 if (vmode == V8QImode)
25664 {
25665 pair = gen_reg_rtx (V16QImode);
25666 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25667 pair = gen_lowpart (TImode, pair);
25668 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25669 }
25670 else
25671 {
25672 pair = gen_reg_rtx (OImode);
25673 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25674 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25675 }
25676 }
25677 }
25678
25679 void
25680 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25681 {
25682 enum machine_mode vmode = GET_MODE (target);
25683 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25684 bool one_vector_p = rtx_equal_p (op0, op1);
25685 rtx rmask[MAX_VECT_LEN], mask;
25686
25687 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25688 numbering of elements for big-endian, we must reverse the order. */
25689 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25690
25691 /* The VTBL instruction does not use a modulo index, so we must take care
25692 of that ourselves. */
25693 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25694 for (i = 0; i < nelt; ++i)
25695 rmask[i] = mask;
25696 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25697 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25698
25699 arm_expand_vec_perm_1 (target, op0, op1, sel);
25700 }
25701
25702 /* Generate or test for an insn that supports a constant permutation. */
25703
25704 /* Recognize patterns for the VUZP insns. */
25705
25706 static bool
25707 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25708 {
25709 unsigned int i, odd, mask, nelt = d->nelt;
25710 rtx out0, out1, in0, in1, x;
25711 rtx (*gen)(rtx, rtx, rtx, rtx);
25712
25713 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25714 return false;
25715
25716 /* Note that these are little-endian tests. Adjust for big-endian later. */
25717 if (d->perm[0] == 0)
25718 odd = 0;
25719 else if (d->perm[0] == 1)
25720 odd = 1;
25721 else
25722 return false;
25723 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25724
25725 for (i = 0; i < nelt; i++)
25726 {
25727 unsigned elt = (i * 2 + odd) & mask;
25728 if (d->perm[i] != elt)
25729 return false;
25730 }
25731
25732 /* Success! */
25733 if (d->testing_p)
25734 return true;
25735
25736 switch (d->vmode)
25737 {
25738 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25739 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25740 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25741 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25742 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25743 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25744 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25745 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25746 default:
25747 gcc_unreachable ();
25748 }
25749
25750 in0 = d->op0;
25751 in1 = d->op1;
25752 if (BYTES_BIG_ENDIAN)
25753 {
25754 x = in0, in0 = in1, in1 = x;
25755 odd = !odd;
25756 }
25757
25758 out0 = d->target;
25759 out1 = gen_reg_rtx (d->vmode);
25760 if (odd)
25761 x = out0, out0 = out1, out1 = x;
25762
25763 emit_insn (gen (out0, in0, in1, out1));
25764 return true;
25765 }
25766
25767 /* Recognize patterns for the VZIP insns. */
25768
25769 static bool
25770 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25771 {
25772 unsigned int i, high, mask, nelt = d->nelt;
25773 rtx out0, out1, in0, in1, x;
25774 rtx (*gen)(rtx, rtx, rtx, rtx);
25775
25776 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25777 return false;
25778
25779 /* Note that these are little-endian tests. Adjust for big-endian later. */
25780 high = nelt / 2;
25781 if (d->perm[0] == high)
25782 ;
25783 else if (d->perm[0] == 0)
25784 high = 0;
25785 else
25786 return false;
25787 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25788
25789 for (i = 0; i < nelt / 2; i++)
25790 {
25791 unsigned elt = (i + high) & mask;
25792 if (d->perm[i * 2] != elt)
25793 return false;
25794 elt = (elt + nelt) & mask;
25795 if (d->perm[i * 2 + 1] != elt)
25796 return false;
25797 }
25798
25799 /* Success! */
25800 if (d->testing_p)
25801 return true;
25802
25803 switch (d->vmode)
25804 {
25805 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25806 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25807 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25808 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25809 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25810 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25811 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25812 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25813 default:
25814 gcc_unreachable ();
25815 }
25816
25817 in0 = d->op0;
25818 in1 = d->op1;
25819 if (BYTES_BIG_ENDIAN)
25820 {
25821 x = in0, in0 = in1, in1 = x;
25822 high = !high;
25823 }
25824
25825 out0 = d->target;
25826 out1 = gen_reg_rtx (d->vmode);
25827 if (high)
25828 x = out0, out0 = out1, out1 = x;
25829
25830 emit_insn (gen (out0, in0, in1, out1));
25831 return true;
25832 }
25833
25834 /* Recognize patterns for the VREV insns. */
25835
25836 static bool
25837 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25838 {
25839 unsigned int i, j, diff, nelt = d->nelt;
25840 rtx (*gen)(rtx, rtx, rtx);
25841
25842 if (!d->one_vector_p)
25843 return false;
25844
25845 diff = d->perm[0];
25846 switch (diff)
25847 {
25848 case 7:
25849 switch (d->vmode)
25850 {
25851 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25852 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25853 default:
25854 return false;
25855 }
25856 break;
25857 case 3:
25858 switch (d->vmode)
25859 {
25860 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25861 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25862 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25863 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25864 default:
25865 return false;
25866 }
25867 break;
25868 case 1:
25869 switch (d->vmode)
25870 {
25871 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25872 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25873 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25874 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25875 case V4SImode: gen = gen_neon_vrev64v4si; break;
25876 case V2SImode: gen = gen_neon_vrev64v2si; break;
25877 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25878 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25879 default:
25880 return false;
25881 }
25882 break;
25883 default:
25884 return false;
25885 }
25886
25887 for (i = 0; i < nelt ; i += diff + 1)
25888 for (j = 0; j <= diff; j += 1)
25889 {
25890 /* This is guaranteed to be true as the value of diff
25891 is 7, 3, 1 and we should have enough elements in the
25892 queue to generate this. Getting a vector mask with a
25893 value of diff other than these values implies that
25894 something is wrong by the time we get here. */
25895 gcc_assert (i + j < nelt);
25896 if (d->perm[i + j] != i + diff - j)
25897 return false;
25898 }
25899
25900 /* Success! */
25901 if (d->testing_p)
25902 return true;
25903
25904 /* ??? The third operand is an artifact of the builtin infrastructure
25905 and is ignored by the actual instruction. */
25906 emit_insn (gen (d->target, d->op0, const0_rtx));
25907 return true;
25908 }
25909
25910 /* Recognize patterns for the VTRN insns. */
25911
25912 static bool
25913 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25914 {
25915 unsigned int i, odd, mask, nelt = d->nelt;
25916 rtx out0, out1, in0, in1, x;
25917 rtx (*gen)(rtx, rtx, rtx, rtx);
25918
25919 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25920 return false;
25921
25922 /* Note that these are little-endian tests. Adjust for big-endian later. */
25923 if (d->perm[0] == 0)
25924 odd = 0;
25925 else if (d->perm[0] == 1)
25926 odd = 1;
25927 else
25928 return false;
25929 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25930
25931 for (i = 0; i < nelt; i += 2)
25932 {
25933 if (d->perm[i] != i + odd)
25934 return false;
25935 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25936 return false;
25937 }
25938
25939 /* Success! */
25940 if (d->testing_p)
25941 return true;
25942
25943 switch (d->vmode)
25944 {
25945 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25946 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25947 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25948 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25949 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25950 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25951 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25952 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25953 default:
25954 gcc_unreachable ();
25955 }
25956
25957 in0 = d->op0;
25958 in1 = d->op1;
25959 if (BYTES_BIG_ENDIAN)
25960 {
25961 x = in0, in0 = in1, in1 = x;
25962 odd = !odd;
25963 }
25964
25965 out0 = d->target;
25966 out1 = gen_reg_rtx (d->vmode);
25967 if (odd)
25968 x = out0, out0 = out1, out1 = x;
25969
25970 emit_insn (gen (out0, in0, in1, out1));
25971 return true;
25972 }
25973
25974 /* Recognize patterns for the VEXT insns. */
25975
25976 static bool
25977 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
25978 {
25979 unsigned int i, nelt = d->nelt;
25980 rtx (*gen) (rtx, rtx, rtx, rtx);
25981 rtx offset;
25982
25983 unsigned int location;
25984
25985 unsigned int next = d->perm[0] + 1;
25986
25987 /* TODO: Handle GCC's numbering of elements for big-endian. */
25988 if (BYTES_BIG_ENDIAN)
25989 return false;
25990
25991 /* Check if the extracted indexes are increasing by one. */
25992 for (i = 1; i < nelt; next++, i++)
25993 {
25994 /* If we hit the most significant element of the 2nd vector in
25995 the previous iteration, no need to test further. */
25996 if (next == 2 * nelt)
25997 return false;
25998
25999 /* If we are operating on only one vector: it could be a
26000 rotation. If there are only two elements of size < 64, let
26001 arm_evpc_neon_vrev catch it. */
26002 if (d->one_vector_p && (next == nelt))
26003 {
26004 if ((nelt == 2) && (d->vmode != V2DImode))
26005 return false;
26006 else
26007 next = 0;
26008 }
26009
26010 if (d->perm[i] != next)
26011 return false;
26012 }
26013
26014 location = d->perm[0];
26015
26016 switch (d->vmode)
26017 {
26018 case V16QImode: gen = gen_neon_vextv16qi; break;
26019 case V8QImode: gen = gen_neon_vextv8qi; break;
26020 case V4HImode: gen = gen_neon_vextv4hi; break;
26021 case V8HImode: gen = gen_neon_vextv8hi; break;
26022 case V2SImode: gen = gen_neon_vextv2si; break;
26023 case V4SImode: gen = gen_neon_vextv4si; break;
26024 case V2SFmode: gen = gen_neon_vextv2sf; break;
26025 case V4SFmode: gen = gen_neon_vextv4sf; break;
26026 case V2DImode: gen = gen_neon_vextv2di; break;
26027 default:
26028 return false;
26029 }
26030
26031 /* Success! */
26032 if (d->testing_p)
26033 return true;
26034
26035 offset = GEN_INT (location);
26036 emit_insn (gen (d->target, d->op0, d->op1, offset));
26037 return true;
26038 }
26039
26040 /* The NEON VTBL instruction is a fully variable permuation that's even
26041 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26042 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26043 can do slightly better by expanding this as a constant where we don't
26044 have to apply a mask. */
26045
26046 static bool
26047 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26048 {
26049 rtx rperm[MAX_VECT_LEN], sel;
26050 enum machine_mode vmode = d->vmode;
26051 unsigned int i, nelt = d->nelt;
26052
26053 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26054 numbering of elements for big-endian, we must reverse the order. */
26055 if (BYTES_BIG_ENDIAN)
26056 return false;
26057
26058 if (d->testing_p)
26059 return true;
26060
26061 /* Generic code will try constant permutation twice. Once with the
26062 original mode and again with the elements lowered to QImode.
26063 So wait and don't do the selector expansion ourselves. */
26064 if (vmode != V8QImode && vmode != V16QImode)
26065 return false;
26066
26067 for (i = 0; i < nelt; ++i)
26068 rperm[i] = GEN_INT (d->perm[i]);
26069 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26070 sel = force_reg (vmode, sel);
26071
26072 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26073 return true;
26074 }
26075
26076 static bool
26077 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26078 {
26079 /* Check if the input mask matches vext before reordering the
26080 operands. */
26081 if (TARGET_NEON)
26082 if (arm_evpc_neon_vext (d))
26083 return true;
26084
26085 /* The pattern matching functions above are written to look for a small
26086 number to begin the sequence (0, 1, N/2). If we begin with an index
26087 from the second operand, we can swap the operands. */
26088 if (d->perm[0] >= d->nelt)
26089 {
26090 unsigned i, nelt = d->nelt;
26091 rtx x;
26092
26093 for (i = 0; i < nelt; ++i)
26094 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26095
26096 x = d->op0;
26097 d->op0 = d->op1;
26098 d->op1 = x;
26099 }
26100
26101 if (TARGET_NEON)
26102 {
26103 if (arm_evpc_neon_vuzp (d))
26104 return true;
26105 if (arm_evpc_neon_vzip (d))
26106 return true;
26107 if (arm_evpc_neon_vrev (d))
26108 return true;
26109 if (arm_evpc_neon_vtrn (d))
26110 return true;
26111 return arm_evpc_neon_vtbl (d);
26112 }
26113 return false;
26114 }
26115
26116 /* Expand a vec_perm_const pattern. */
26117
26118 bool
26119 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26120 {
26121 struct expand_vec_perm_d d;
26122 int i, nelt, which;
26123
26124 d.target = target;
26125 d.op0 = op0;
26126 d.op1 = op1;
26127
26128 d.vmode = GET_MODE (target);
26129 gcc_assert (VECTOR_MODE_P (d.vmode));
26130 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26131 d.testing_p = false;
26132
26133 for (i = which = 0; i < nelt; ++i)
26134 {
26135 rtx e = XVECEXP (sel, 0, i);
26136 int ei = INTVAL (e) & (2 * nelt - 1);
26137 which |= (ei < nelt ? 1 : 2);
26138 d.perm[i] = ei;
26139 }
26140
26141 switch (which)
26142 {
26143 default:
26144 gcc_unreachable();
26145
26146 case 3:
26147 d.one_vector_p = false;
26148 if (!rtx_equal_p (op0, op1))
26149 break;
26150
26151 /* The elements of PERM do not suggest that only the first operand
26152 is used, but both operands are identical. Allow easier matching
26153 of the permutation by folding the permutation into the single
26154 input vector. */
26155 /* FALLTHRU */
26156 case 2:
26157 for (i = 0; i < nelt; ++i)
26158 d.perm[i] &= nelt - 1;
26159 d.op0 = op1;
26160 d.one_vector_p = true;
26161 break;
26162
26163 case 1:
26164 d.op1 = op0;
26165 d.one_vector_p = true;
26166 break;
26167 }
26168
26169 return arm_expand_vec_perm_const_1 (&d);
26170 }
26171
26172 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26173
26174 static bool
26175 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
26176 const unsigned char *sel)
26177 {
26178 struct expand_vec_perm_d d;
26179 unsigned int i, nelt, which;
26180 bool ret;
26181
26182 d.vmode = vmode;
26183 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26184 d.testing_p = true;
26185 memcpy (d.perm, sel, nelt);
26186
26187 /* Categorize the set of elements in the selector. */
26188 for (i = which = 0; i < nelt; ++i)
26189 {
26190 unsigned char e = d.perm[i];
26191 gcc_assert (e < 2 * nelt);
26192 which |= (e < nelt ? 1 : 2);
26193 }
26194
26195 /* For all elements from second vector, fold the elements to first. */
26196 if (which == 2)
26197 for (i = 0; i < nelt; ++i)
26198 d.perm[i] -= nelt;
26199
26200 /* Check whether the mask can be applied to the vector type. */
26201 d.one_vector_p = (which != 3);
26202
26203 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
26204 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
26205 if (!d.one_vector_p)
26206 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
26207
26208 start_sequence ();
26209 ret = arm_expand_vec_perm_const_1 (&d);
26210 end_sequence ();
26211
26212 return ret;
26213 }
26214
26215 bool
26216 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
26217 {
26218 /* If we are soft float and we do not have ldrd
26219 then all auto increment forms are ok. */
26220 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26221 return true;
26222
26223 switch (code)
26224 {
26225 /* Post increment and Pre Decrement are supported for all
26226 instruction forms except for vector forms. */
26227 case ARM_POST_INC:
26228 case ARM_PRE_DEC:
26229 if (VECTOR_MODE_P (mode))
26230 {
26231 if (code != ARM_PRE_DEC)
26232 return true;
26233 else
26234 return false;
26235 }
26236
26237 return true;
26238
26239 case ARM_POST_DEC:
26240 case ARM_PRE_INC:
26241 /* Without LDRD and mode size greater than
26242 word size, there is no point in auto-incrementing
26243 because ldm and stm will not have these forms. */
26244 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26245 return false;
26246
26247 /* Vector and floating point modes do not support
26248 these auto increment forms. */
26249 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26250 return false;
26251
26252 return true;
26253
26254 default:
26255 return false;
26256
26257 }
26258
26259 return false;
26260 }
26261
26262 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26263 on ARM, since we know that shifts by negative amounts are no-ops.
26264 Additionally, the default expansion code is not available or suitable
26265 for post-reload insn splits (this can occur when the register allocator
26266 chooses not to do a shift in NEON).
26267
26268 This function is used in both initial expand and post-reload splits, and
26269 handles all kinds of 64-bit shifts.
26270
26271 Input requirements:
26272 - It is safe for the input and output to be the same register, but
26273 early-clobber rules apply for the shift amount and scratch registers.
26274 - Shift by register requires both scratch registers. Shift by a constant
26275 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26276 the scratch registers may be NULL.
26277 - Ashiftrt by a register also clobbers the CC register. */
26278 void
26279 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26280 rtx amount, rtx scratch1, rtx scratch2)
26281 {
26282 rtx out_high = gen_highpart (SImode, out);
26283 rtx out_low = gen_lowpart (SImode, out);
26284 rtx in_high = gen_highpart (SImode, in);
26285 rtx in_low = gen_lowpart (SImode, in);
26286
26287 /* Terminology:
26288 in = the register pair containing the input value.
26289 out = the destination register pair.
26290 up = the high- or low-part of each pair.
26291 down = the opposite part to "up".
26292 In a shift, we can consider bits to shift from "up"-stream to
26293 "down"-stream, so in a left-shift "up" is the low-part and "down"
26294 is the high-part of each register pair. */
26295
26296 rtx out_up = code == ASHIFT ? out_low : out_high;
26297 rtx out_down = code == ASHIFT ? out_high : out_low;
26298 rtx in_up = code == ASHIFT ? in_low : in_high;
26299 rtx in_down = code == ASHIFT ? in_high : in_low;
26300
26301 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26302 gcc_assert (out
26303 && (REG_P (out) || GET_CODE (out) == SUBREG)
26304 && GET_MODE (out) == DImode);
26305 gcc_assert (in
26306 && (REG_P (in) || GET_CODE (in) == SUBREG)
26307 && GET_MODE (in) == DImode);
26308 gcc_assert (amount
26309 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26310 && GET_MODE (amount) == SImode)
26311 || CONST_INT_P (amount)));
26312 gcc_assert (scratch1 == NULL
26313 || (GET_CODE (scratch1) == SCRATCH)
26314 || (GET_MODE (scratch1) == SImode
26315 && REG_P (scratch1)));
26316 gcc_assert (scratch2 == NULL
26317 || (GET_CODE (scratch2) == SCRATCH)
26318 || (GET_MODE (scratch2) == SImode
26319 && REG_P (scratch2)));
26320 gcc_assert (!REG_P (out) || !REG_P (amount)
26321 || !HARD_REGISTER_P (out)
26322 || (REGNO (out) != REGNO (amount)
26323 && REGNO (out) + 1 != REGNO (amount)));
26324
26325 /* Macros to make following code more readable. */
26326 #define SUB_32(DEST,SRC) \
26327 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26328 #define RSB_32(DEST,SRC) \
26329 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26330 #define SUB_S_32(DEST,SRC) \
26331 gen_addsi3_compare0 ((DEST), (SRC), \
26332 GEN_INT (-32))
26333 #define SET(DEST,SRC) \
26334 gen_rtx_SET (SImode, (DEST), (SRC))
26335 #define SHIFT(CODE,SRC,AMOUNT) \
26336 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26337 #define LSHIFT(CODE,SRC,AMOUNT) \
26338 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26339 SImode, (SRC), (AMOUNT))
26340 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26341 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26342 SImode, (SRC), (AMOUNT))
26343 #define ORR(A,B) \
26344 gen_rtx_IOR (SImode, (A), (B))
26345 #define BRANCH(COND,LABEL) \
26346 gen_arm_cond_branch ((LABEL), \
26347 gen_rtx_ ## COND (CCmode, cc_reg, \
26348 const0_rtx), \
26349 cc_reg)
26350
26351 /* Shifts by register and shifts by constant are handled separately. */
26352 if (CONST_INT_P (amount))
26353 {
26354 /* We have a shift-by-constant. */
26355
26356 /* First, handle out-of-range shift amounts.
26357 In both cases we try to match the result an ARM instruction in a
26358 shift-by-register would give. This helps reduce execution
26359 differences between optimization levels, but it won't stop other
26360 parts of the compiler doing different things. This is "undefined
26361 behaviour, in any case. */
26362 if (INTVAL (amount) <= 0)
26363 emit_insn (gen_movdi (out, in));
26364 else if (INTVAL (amount) >= 64)
26365 {
26366 if (code == ASHIFTRT)
26367 {
26368 rtx const31_rtx = GEN_INT (31);
26369 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26370 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26371 }
26372 else
26373 emit_insn (gen_movdi (out, const0_rtx));
26374 }
26375
26376 /* Now handle valid shifts. */
26377 else if (INTVAL (amount) < 32)
26378 {
26379 /* Shifts by a constant less than 32. */
26380 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
26381
26382 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26383 emit_insn (SET (out_down,
26384 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26385 out_down)));
26386 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26387 }
26388 else
26389 {
26390 /* Shifts by a constant greater than 31. */
26391 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
26392
26393 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26394 if (code == ASHIFTRT)
26395 emit_insn (gen_ashrsi3 (out_up, in_up,
26396 GEN_INT (31)));
26397 else
26398 emit_insn (SET (out_up, const0_rtx));
26399 }
26400 }
26401 else
26402 {
26403 /* We have a shift-by-register. */
26404 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26405
26406 /* This alternative requires the scratch registers. */
26407 gcc_assert (scratch1 && REG_P (scratch1));
26408 gcc_assert (scratch2 && REG_P (scratch2));
26409
26410 /* We will need the values "amount-32" and "32-amount" later.
26411 Swapping them around now allows the later code to be more general. */
26412 switch (code)
26413 {
26414 case ASHIFT:
26415 emit_insn (SUB_32 (scratch1, amount));
26416 emit_insn (RSB_32 (scratch2, amount));
26417 break;
26418 case ASHIFTRT:
26419 emit_insn (RSB_32 (scratch1, amount));
26420 /* Also set CC = amount > 32. */
26421 emit_insn (SUB_S_32 (scratch2, amount));
26422 break;
26423 case LSHIFTRT:
26424 emit_insn (RSB_32 (scratch1, amount));
26425 emit_insn (SUB_32 (scratch2, amount));
26426 break;
26427 default:
26428 gcc_unreachable ();
26429 }
26430
26431 /* Emit code like this:
26432
26433 arithmetic-left:
26434 out_down = in_down << amount;
26435 out_down = (in_up << (amount - 32)) | out_down;
26436 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26437 out_up = in_up << amount;
26438
26439 arithmetic-right:
26440 out_down = in_down >> amount;
26441 out_down = (in_up << (32 - amount)) | out_down;
26442 if (amount < 32)
26443 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26444 out_up = in_up << amount;
26445
26446 logical-right:
26447 out_down = in_down >> amount;
26448 out_down = (in_up << (32 - amount)) | out_down;
26449 if (amount < 32)
26450 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26451 out_up = in_up << amount;
26452
26453 The ARM and Thumb2 variants are the same but implemented slightly
26454 differently. If this were only called during expand we could just
26455 use the Thumb2 case and let combine do the right thing, but this
26456 can also be called from post-reload splitters. */
26457
26458 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26459
26460 if (!TARGET_THUMB2)
26461 {
26462 /* Emit code for ARM mode. */
26463 emit_insn (SET (out_down,
26464 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26465 if (code == ASHIFTRT)
26466 {
26467 rtx done_label = gen_label_rtx ();
26468 emit_jump_insn (BRANCH (LT, done_label));
26469 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26470 out_down)));
26471 emit_label (done_label);
26472 }
26473 else
26474 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26475 out_down)));
26476 }
26477 else
26478 {
26479 /* Emit code for Thumb2 mode.
26480 Thumb2 can't do shift and or in one insn. */
26481 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26482 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26483
26484 if (code == ASHIFTRT)
26485 {
26486 rtx done_label = gen_label_rtx ();
26487 emit_jump_insn (BRANCH (LT, done_label));
26488 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26489 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26490 emit_label (done_label);
26491 }
26492 else
26493 {
26494 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26495 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26496 }
26497 }
26498
26499 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26500 }
26501
26502 #undef SUB_32
26503 #undef RSB_32
26504 #undef SUB_S_32
26505 #undef SET
26506 #undef SHIFT
26507 #undef LSHIFT
26508 #undef REV_LSHIFT
26509 #undef ORR
26510 #undef BRANCH
26511 }
26512
26513
26514 /* Returns true if a valid comparison operation and makes
26515 the operands in a form that is valid. */
26516 bool
26517 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26518 {
26519 enum rtx_code code = GET_CODE (*comparison);
26520 enum rtx_code canonical_code;
26521 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26522 ? GET_MODE (*op2) : GET_MODE (*op1);
26523
26524 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26525
26526 if (code == UNEQ || code == LTGT)
26527 return false;
26528
26529 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26530 PUT_CODE (*comparison, canonical_code);
26531
26532 switch (mode)
26533 {
26534 case SImode:
26535 if (!arm_add_operand (*op1, mode))
26536 *op1 = force_reg (mode, *op1);
26537 if (!arm_add_operand (*op2, mode))
26538 *op2 = force_reg (mode, *op2);
26539 return true;
26540
26541 case DImode:
26542 if (!cmpdi_operand (*op1, mode))
26543 *op1 = force_reg (mode, *op1);
26544 if (!cmpdi_operand (*op2, mode))
26545 *op2 = force_reg (mode, *op2);
26546 return true;
26547
26548 case SFmode:
26549 case DFmode:
26550 if (!arm_float_compare_operand (*op1, mode))
26551 *op1 = force_reg (mode, *op1);
26552 if (!arm_float_compare_operand (*op2, mode))
26553 *op2 = force_reg (mode, *op2);
26554 return true;
26555 default:
26556 break;
26557 }
26558
26559 return false;
26560
26561 }
26562
26563 #include "gt-arm.h"