]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
arm.c (arm_rtx_costs_1): Handle vec_extract and vec_set patterns.
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
58 #include "dumpfile.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 struct four_ints
67 {
68 int i[4];
69 };
70
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
188
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
196
197 static void arm_file_end (void);
198 static void arm_file_start (void);
199
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
217
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree);
240 static const char *arm_invalid_parameter_type (const_tree t);
241 static const char *arm_invalid_return_type (const_tree t);
242 static tree arm_promoted_type (const_tree t);
243 static tree arm_convert_to_type (tree type, tree expr);
244 static bool arm_scalar_mode_supported_p (enum machine_mode);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx, tree, rtx);
249 static rtx arm_trampoline_adjust_address (rtx);
250 static rtx arm_pic_static_addr (rtx orig, rtx reg);
251 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode,
255 unsigned HOST_WIDE_INT);
256 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
257 static bool arm_class_likely_spilled_p (reg_class_t);
258 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
261 const_tree type,
262 int misalignment,
263 bool is_packed);
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
269
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
271 const unsigned char *sel);
272
273 \f
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table[] =
276 {
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
281 call. */
282 { "long_call", 0, 0, false, true, true, NULL, false },
283 /* Whereas these functions are always known to reside within the 26 bit
284 addressing range. */
285 { "short_call", 0, 0, false, true, true, NULL, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
288 false },
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
291 false },
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
293 false },
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 false },
296 #ifdef ARM_PE
297 /* ARM/PE has three new attributes:
298 interfacearm - ?
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
301
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
304 multiple times.
305 */
306 { "dllimport", 0, 0, true, false, false, NULL, false },
307 { "dllexport", 0, 0, true, false, false, NULL, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 false },
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
314 false },
315 #endif
316 { NULL, 0, 0, false, false, false, NULL, false }
317 };
318 \f
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
323 #endif
324
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
327
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
330
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
335
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
340
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
347
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
350
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
353
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
356
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
359
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
362
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
365
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
368
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
371
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
374
375 #undef TARGET_ENCODE_SECTION_INFO
376 #ifdef ARM_PE
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
378 #else
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
380 #endif
381
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
384
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
387
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
390
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
393
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
396
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
399
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
404
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
409
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
421
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
424
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
431
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
434
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
449
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
452
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
455
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
462
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
465
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
499
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
511
512 #if ARM_UNWIND_INFO
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
519
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
522
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
529
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
535
536 #ifdef HAVE_AS_TLS
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
539 #endif
540
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
552
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
558
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
564
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
571
572 #ifdef HAVE_AS_TLS
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
575 #endif
576
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
582
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
585
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
588
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
591
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
594
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
597
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
600
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
603
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
606
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
609
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
612
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
616
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
620
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
624
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
628
629 struct gcc_target targetm = TARGET_INITIALIZER;
630 \f
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack;
633 static char * minipool_startobj;
634
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped = 5;
638
639 extern FILE * asm_out_file;
640
641 /* True if we are currently building a constant table. */
642 int making_const_table;
643
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune = arm_none;
646
647 /* The current tuning set. */
648 const struct tune_params *current_tune;
649
650 /* Which floating point hardware to schedule for. */
651 int arm_fpu_attr;
652
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc *arm_fpu_desc;
655
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label[14];
658 static int thumb_call_reg_needed;
659
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
681 profile. */
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
686 architecture. */
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689
690 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
691 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
692
693 /* Flags that only effect tuning, not available instructions. */
694 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
695 | FL_CO_PROC)
696
697 #define FL_FOR_ARCH2 FL_NOTM
698 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
699 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
700 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
701 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
702 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
703 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
704 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
705 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
706 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
707 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
708 #define FL_FOR_ARCH6J FL_FOR_ARCH6
709 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
710 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
711 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
712 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
713 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
714 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
715 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
716 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
717 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
719
720 /* The bits in this mask specify which
721 instructions we are allowed to generate. */
722 static unsigned long insn_flags = 0;
723
724 /* The bits in this mask specify which instruction scheduling options should
725 be used. */
726 static unsigned long tune_flags = 0;
727
728 /* The highest ARM architecture version supported by the
729 target. */
730 enum base_architecture arm_base_arch = BASE_ARCH_0;
731
732 /* The following are used in the arm.md file as equivalents to bits
733 in the above two flag variables. */
734
735 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
736 int arm_arch3m = 0;
737
738 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
739 int arm_arch4 = 0;
740
741 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
742 int arm_arch4t = 0;
743
744 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
745 int arm_arch5 = 0;
746
747 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
748 int arm_arch5e = 0;
749
750 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
751 int arm_arch6 = 0;
752
753 /* Nonzero if this chip supports the ARM 6K extensions. */
754 int arm_arch6k = 0;
755
756 /* Nonzero if this chip supports the ARM 7 extensions. */
757 int arm_arch7 = 0;
758
759 /* Nonzero if instructions not present in the 'M' profile can be used. */
760 int arm_arch_notm = 0;
761
762 /* Nonzero if instructions present in ARMv7E-M can be used. */
763 int arm_arch7em = 0;
764
765 /* Nonzero if this chip can benefit from load scheduling. */
766 int arm_ld_sched = 0;
767
768 /* Nonzero if this chip is a StrongARM. */
769 int arm_tune_strongarm = 0;
770
771 /* Nonzero if this chip supports Intel Wireless MMX technology. */
772 int arm_arch_iwmmxt = 0;
773
774 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
775 int arm_arch_iwmmxt2 = 0;
776
777 /* Nonzero if this chip is an XScale. */
778 int arm_arch_xscale = 0;
779
780 /* Nonzero if tuning for XScale */
781 int arm_tune_xscale = 0;
782
783 /* Nonzero if we want to tune for stores that access the write-buffer.
784 This typically means an ARM6 or ARM7 with MMU or MPU. */
785 int arm_tune_wbuf = 0;
786
787 /* Nonzero if tuning for Cortex-A9. */
788 int arm_tune_cortex_a9 = 0;
789
790 /* Nonzero if generating Thumb instructions. */
791 int thumb_code = 0;
792
793 /* Nonzero if generating Thumb-1 instructions. */
794 int thumb1_code = 0;
795
796 /* Nonzero if we should define __THUMB_INTERWORK__ in the
797 preprocessor.
798 XXX This is a bit of a hack, it's intended to help work around
799 problems in GLD which doesn't understand that armv5t code is
800 interworking clean. */
801 int arm_cpp_interwork = 0;
802
803 /* Nonzero if chip supports Thumb 2. */
804 int arm_arch_thumb2;
805
806 /* Nonzero if chip supports integer division instruction. */
807 int arm_arch_arm_hwdiv;
808 int arm_arch_thumb_hwdiv;
809
810 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
811 we must report the mode of the memory reference from
812 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
813 enum machine_mode output_memory_reference_mode;
814
815 /* The register number to be used for the PIC offset register. */
816 unsigned arm_pic_register = INVALID_REGNUM;
817
818 /* Set to 1 after arm_reorg has started. Reset to start at the start of
819 the next function. */
820 static int after_arm_reorg = 0;
821
822 enum arm_pcs arm_pcs_default;
823
824 /* For an explanation of these variables, see final_prescan_insn below. */
825 int arm_ccfsm_state;
826 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
827 enum arm_cond_code arm_current_cc;
828
829 rtx arm_target_insn;
830 int arm_target_label;
831 /* The number of conditionally executed insns, including the current insn. */
832 int arm_condexec_count = 0;
833 /* A bitmask specifying the patterns for the IT block.
834 Zero means do not output an IT block before this insn. */
835 int arm_condexec_mask = 0;
836 /* The number of bits used in arm_condexec_mask. */
837 int arm_condexec_masklen = 0;
838
839 /* The condition codes of the ARM, and the inverse function. */
840 static const char * const arm_condition_codes[] =
841 {
842 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
843 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
844 };
845
846 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
847 int arm_regs_in_sequence[] =
848 {
849 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
850 };
851
852 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
853 #define streq(string1, string2) (strcmp (string1, string2) == 0)
854
855 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
856 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
857 | (1 << PIC_OFFSET_TABLE_REGNUM)))
858 \f
859 /* Initialization code. */
860
861 struct processors
862 {
863 const char *const name;
864 enum processor_type core;
865 const char *arch;
866 enum base_architecture base_arch;
867 const unsigned long flags;
868 const struct tune_params *const tune;
869 };
870
871
872 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
873 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
874 prefetch_slots, \
875 l1_size, \
876 l1_line_size
877
878 const struct tune_params arm_slowmul_tune =
879 {
880 arm_slowmul_rtx_costs,
881 NULL,
882 3, /* Constant limit. */
883 5, /* Max cond insns. */
884 ARM_PREFETCH_NOT_BENEFICIAL,
885 true, /* Prefer constant pool. */
886 arm_default_branch_cost,
887 false /* Prefer LDRD/STRD. */
888 };
889
890 const struct tune_params arm_fastmul_tune =
891 {
892 arm_fastmul_rtx_costs,
893 NULL,
894 1, /* Constant limit. */
895 5, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost,
899 false /* Prefer LDRD/STRD. */
900 };
901
902 /* StrongARM has early execution of branches, so a sequence that is worth
903 skipping is shorter. Set max_insns_skipped to a lower value. */
904
905 const struct tune_params arm_strongarm_tune =
906 {
907 arm_fastmul_rtx_costs,
908 NULL,
909 1, /* Constant limit. */
910 3, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost,
914 false /* Prefer LDRD/STRD. */
915 };
916
917 const struct tune_params arm_xscale_tune =
918 {
919 arm_xscale_rtx_costs,
920 xscale_sched_adjust_cost,
921 2, /* Constant limit. */
922 3, /* Max cond insns. */
923 ARM_PREFETCH_NOT_BENEFICIAL,
924 true, /* Prefer constant pool. */
925 arm_default_branch_cost,
926 false /* Prefer LDRD/STRD. */
927 };
928
929 const struct tune_params arm_9e_tune =
930 {
931 arm_9e_rtx_costs,
932 NULL,
933 1, /* Constant limit. */
934 5, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL,
936 true, /* Prefer constant pool. */
937 arm_default_branch_cost,
938 false /* Prefer LDRD/STRD. */
939 };
940
941 const struct tune_params arm_v6t2_tune =
942 {
943 arm_9e_rtx_costs,
944 NULL,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL,
948 false, /* Prefer constant pool. */
949 arm_default_branch_cost,
950 false /* Prefer LDRD/STRD. */
951 };
952
953 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
954 const struct tune_params arm_cortex_tune =
955 {
956 arm_9e_rtx_costs,
957 NULL,
958 1, /* Constant limit. */
959 5, /* Max cond insns. */
960 ARM_PREFETCH_NOT_BENEFICIAL,
961 false, /* Prefer constant pool. */
962 arm_default_branch_cost,
963 false /* Prefer LDRD/STRD. */
964 };
965
966 const struct tune_params arm_cortex_a15_tune =
967 {
968 arm_9e_rtx_costs,
969 NULL,
970 1, /* Constant limit. */
971 5, /* Max cond insns. */
972 ARM_PREFETCH_NOT_BENEFICIAL,
973 false, /* Prefer constant pool. */
974 arm_default_branch_cost,
975 true /* Prefer LDRD/STRD. */
976 };
977
978 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
979 less appealing. Set max_insns_skipped to a low value. */
980
981 const struct tune_params arm_cortex_a5_tune =
982 {
983 arm_9e_rtx_costs,
984 NULL,
985 1, /* Constant limit. */
986 1, /* Max cond insns. */
987 ARM_PREFETCH_NOT_BENEFICIAL,
988 false, /* Prefer constant pool. */
989 arm_cortex_a5_branch_cost,
990 false /* Prefer LDRD/STRD. */
991 };
992
993 const struct tune_params arm_cortex_a9_tune =
994 {
995 arm_9e_rtx_costs,
996 cortex_a9_sched_adjust_cost,
997 1, /* Constant limit. */
998 5, /* Max cond insns. */
999 ARM_PREFETCH_BENEFICIAL(4,32,32),
1000 false, /* Prefer constant pool. */
1001 arm_default_branch_cost,
1002 false /* Prefer LDRD/STRD. */
1003 };
1004
1005 const struct tune_params arm_fa726te_tune =
1006 {
1007 arm_9e_rtx_costs,
1008 fa726te_sched_adjust_cost,
1009 1, /* Constant limit. */
1010 5, /* Max cond insns. */
1011 ARM_PREFETCH_NOT_BENEFICIAL,
1012 true, /* Prefer constant pool. */
1013 arm_default_branch_cost,
1014 false /* Prefer LDRD/STRD. */
1015 };
1016
1017
1018 /* Not all of these give usefully different compilation alternatives,
1019 but there is no simple way of generalizing them. */
1020 static const struct processors all_cores[] =
1021 {
1022 /* ARM Cores */
1023 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1024 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1025 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1026 #include "arm-cores.def"
1027 #undef ARM_CORE
1028 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1029 };
1030
1031 static const struct processors all_architectures[] =
1032 {
1033 /* ARM Architectures */
1034 /* We don't specify tuning costs here as it will be figured out
1035 from the core. */
1036
1037 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1038 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1039 #include "arm-arches.def"
1040 #undef ARM_ARCH
1041 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1042 };
1043
1044
1045 /* These are populated as commandline arguments are processed, or NULL
1046 if not specified. */
1047 static const struct processors *arm_selected_arch;
1048 static const struct processors *arm_selected_cpu;
1049 static const struct processors *arm_selected_tune;
1050
1051 /* The name of the preprocessor macro to define for this architecture. */
1052
1053 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1054
1055 /* Available values for -mfpu=. */
1056
1057 static const struct arm_fpu_desc all_fpus[] =
1058 {
1059 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1060 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1061 #include "arm-fpus.def"
1062 #undef ARM_FPU
1063 };
1064
1065
1066 /* Supported TLS relocations. */
1067
1068 enum tls_reloc {
1069 TLS_GD32,
1070 TLS_LDM32,
1071 TLS_LDO32,
1072 TLS_IE32,
1073 TLS_LE32,
1074 TLS_DESCSEQ /* GNU scheme */
1075 };
1076
1077 /* The maximum number of insns to be used when loading a constant. */
1078 inline static int
1079 arm_constant_limit (bool size_p)
1080 {
1081 return size_p ? 1 : current_tune->constant_limit;
1082 }
1083
1084 /* Emit an insn that's a simple single-set. Both the operands must be known
1085 to be valid. */
1086 inline static rtx
1087 emit_set_insn (rtx x, rtx y)
1088 {
1089 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1090 }
1091
1092 /* Return the number of bits set in VALUE. */
1093 static unsigned
1094 bit_count (unsigned long value)
1095 {
1096 unsigned long count = 0;
1097
1098 while (value)
1099 {
1100 count++;
1101 value &= value - 1; /* Clear the least-significant set bit. */
1102 }
1103
1104 return count;
1105 }
1106
1107 typedef struct
1108 {
1109 enum machine_mode mode;
1110 const char *name;
1111 } arm_fixed_mode_set;
1112
1113 /* A small helper for setting fixed-point library libfuncs. */
1114
1115 static void
1116 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1117 const char *funcname, const char *modename,
1118 int num_suffix)
1119 {
1120 char buffer[50];
1121
1122 if (num_suffix == 0)
1123 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1124 else
1125 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1126
1127 set_optab_libfunc (optable, mode, buffer);
1128 }
1129
1130 static void
1131 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1132 enum machine_mode from, const char *funcname,
1133 const char *toname, const char *fromname)
1134 {
1135 char buffer[50];
1136 const char *maybe_suffix_2 = "";
1137
1138 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1139 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1140 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1141 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1142 maybe_suffix_2 = "2";
1143
1144 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1145 maybe_suffix_2);
1146
1147 set_conv_libfunc (optable, to, from, buffer);
1148 }
1149
1150 /* Set up library functions unique to ARM. */
1151
1152 static void
1153 arm_init_libfuncs (void)
1154 {
1155 /* For Linux, we have access to kernel support for atomic operations. */
1156 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1157 init_sync_libfuncs (2 * UNITS_PER_WORD);
1158
1159 /* There are no special library functions unless we are using the
1160 ARM BPABI. */
1161 if (!TARGET_BPABI)
1162 return;
1163
1164 /* The functions below are described in Section 4 of the "Run-Time
1165 ABI for the ARM architecture", Version 1.0. */
1166
1167 /* Double-precision floating-point arithmetic. Table 2. */
1168 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1169 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1170 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1171 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1172 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1173
1174 /* Double-precision comparisons. Table 3. */
1175 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1176 set_optab_libfunc (ne_optab, DFmode, NULL);
1177 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1178 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1179 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1180 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1181 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1182
1183 /* Single-precision floating-point arithmetic. Table 4. */
1184 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1185 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1186 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1187 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1188 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1189
1190 /* Single-precision comparisons. Table 5. */
1191 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1192 set_optab_libfunc (ne_optab, SFmode, NULL);
1193 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1194 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1195 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1196 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1197 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1198
1199 /* Floating-point to integer conversions. Table 6. */
1200 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1201 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1202 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1203 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1204 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1205 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1206 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1207 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1208
1209 /* Conversions between floating types. Table 7. */
1210 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1211 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1212
1213 /* Integer to floating-point conversions. Table 8. */
1214 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1215 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1216 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1217 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1218 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1219 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1220 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1221 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1222
1223 /* Long long. Table 9. */
1224 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1225 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1226 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1227 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1228 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1229 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1230 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1231 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1232
1233 /* Integer (32/32->32) division. \S 4.3.1. */
1234 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1235 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1236
1237 /* The divmod functions are designed so that they can be used for
1238 plain division, even though they return both the quotient and the
1239 remainder. The quotient is returned in the usual location (i.e.,
1240 r0 for SImode, {r0, r1} for DImode), just as would be expected
1241 for an ordinary division routine. Because the AAPCS calling
1242 conventions specify that all of { r0, r1, r2, r3 } are
1243 callee-saved registers, there is no need to tell the compiler
1244 explicitly that those registers are clobbered by these
1245 routines. */
1246 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1247 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1248
1249 /* For SImode division the ABI provides div-without-mod routines,
1250 which are faster. */
1251 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1252 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1253
1254 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1255 divmod libcalls instead. */
1256 set_optab_libfunc (smod_optab, DImode, NULL);
1257 set_optab_libfunc (umod_optab, DImode, NULL);
1258 set_optab_libfunc (smod_optab, SImode, NULL);
1259 set_optab_libfunc (umod_optab, SImode, NULL);
1260
1261 /* Half-precision float operations. The compiler handles all operations
1262 with NULL libfuncs by converting the SFmode. */
1263 switch (arm_fp16_format)
1264 {
1265 case ARM_FP16_FORMAT_IEEE:
1266 case ARM_FP16_FORMAT_ALTERNATIVE:
1267
1268 /* Conversions. */
1269 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1270 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1271 ? "__gnu_f2h_ieee"
1272 : "__gnu_f2h_alternative"));
1273 set_conv_libfunc (sext_optab, SFmode, HFmode,
1274 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1275 ? "__gnu_h2f_ieee"
1276 : "__gnu_h2f_alternative"));
1277
1278 /* Arithmetic. */
1279 set_optab_libfunc (add_optab, HFmode, NULL);
1280 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1281 set_optab_libfunc (smul_optab, HFmode, NULL);
1282 set_optab_libfunc (neg_optab, HFmode, NULL);
1283 set_optab_libfunc (sub_optab, HFmode, NULL);
1284
1285 /* Comparisons. */
1286 set_optab_libfunc (eq_optab, HFmode, NULL);
1287 set_optab_libfunc (ne_optab, HFmode, NULL);
1288 set_optab_libfunc (lt_optab, HFmode, NULL);
1289 set_optab_libfunc (le_optab, HFmode, NULL);
1290 set_optab_libfunc (ge_optab, HFmode, NULL);
1291 set_optab_libfunc (gt_optab, HFmode, NULL);
1292 set_optab_libfunc (unord_optab, HFmode, NULL);
1293 break;
1294
1295 default:
1296 break;
1297 }
1298
1299 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1300 {
1301 const arm_fixed_mode_set fixed_arith_modes[] =
1302 {
1303 { QQmode, "qq" },
1304 { UQQmode, "uqq" },
1305 { HQmode, "hq" },
1306 { UHQmode, "uhq" },
1307 { SQmode, "sq" },
1308 { USQmode, "usq" },
1309 { DQmode, "dq" },
1310 { UDQmode, "udq" },
1311 { TQmode, "tq" },
1312 { UTQmode, "utq" },
1313 { HAmode, "ha" },
1314 { UHAmode, "uha" },
1315 { SAmode, "sa" },
1316 { USAmode, "usa" },
1317 { DAmode, "da" },
1318 { UDAmode, "uda" },
1319 { TAmode, "ta" },
1320 { UTAmode, "uta" }
1321 };
1322 const arm_fixed_mode_set fixed_conv_modes[] =
1323 {
1324 { QQmode, "qq" },
1325 { UQQmode, "uqq" },
1326 { HQmode, "hq" },
1327 { UHQmode, "uhq" },
1328 { SQmode, "sq" },
1329 { USQmode, "usq" },
1330 { DQmode, "dq" },
1331 { UDQmode, "udq" },
1332 { TQmode, "tq" },
1333 { UTQmode, "utq" },
1334 { HAmode, "ha" },
1335 { UHAmode, "uha" },
1336 { SAmode, "sa" },
1337 { USAmode, "usa" },
1338 { DAmode, "da" },
1339 { UDAmode, "uda" },
1340 { TAmode, "ta" },
1341 { UTAmode, "uta" },
1342 { QImode, "qi" },
1343 { HImode, "hi" },
1344 { SImode, "si" },
1345 { DImode, "di" },
1346 { TImode, "ti" },
1347 { SFmode, "sf" },
1348 { DFmode, "df" }
1349 };
1350 unsigned int i, j;
1351
1352 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1353 {
1354 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1355 "add", fixed_arith_modes[i].name, 3);
1356 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1357 "ssadd", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1359 "usadd", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1361 "sub", fixed_arith_modes[i].name, 3);
1362 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1363 "sssub", fixed_arith_modes[i].name, 3);
1364 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1365 "ussub", fixed_arith_modes[i].name, 3);
1366 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1367 "mul", fixed_arith_modes[i].name, 3);
1368 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1369 "ssmul", fixed_arith_modes[i].name, 3);
1370 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1371 "usmul", fixed_arith_modes[i].name, 3);
1372 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1373 "div", fixed_arith_modes[i].name, 3);
1374 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1375 "udiv", fixed_arith_modes[i].name, 3);
1376 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1377 "ssdiv", fixed_arith_modes[i].name, 3);
1378 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1379 "usdiv", fixed_arith_modes[i].name, 3);
1380 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1381 "neg", fixed_arith_modes[i].name, 2);
1382 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1383 "ssneg", fixed_arith_modes[i].name, 2);
1384 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1385 "usneg", fixed_arith_modes[i].name, 2);
1386 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1387 "ashl", fixed_arith_modes[i].name, 3);
1388 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1389 "ashr", fixed_arith_modes[i].name, 3);
1390 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1391 "lshr", fixed_arith_modes[i].name, 3);
1392 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1393 "ssashl", fixed_arith_modes[i].name, 3);
1394 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1395 "usashl", fixed_arith_modes[i].name, 3);
1396 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1397 "cmp", fixed_arith_modes[i].name, 2);
1398 }
1399
1400 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1401 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1402 {
1403 if (i == j
1404 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1405 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1406 continue;
1407
1408 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1409 fixed_conv_modes[j].mode, "fract",
1410 fixed_conv_modes[i].name,
1411 fixed_conv_modes[j].name);
1412 arm_set_fixed_conv_libfunc (satfract_optab,
1413 fixed_conv_modes[i].mode,
1414 fixed_conv_modes[j].mode, "satfract",
1415 fixed_conv_modes[i].name,
1416 fixed_conv_modes[j].name);
1417 arm_set_fixed_conv_libfunc (fractuns_optab,
1418 fixed_conv_modes[i].mode,
1419 fixed_conv_modes[j].mode, "fractuns",
1420 fixed_conv_modes[i].name,
1421 fixed_conv_modes[j].name);
1422 arm_set_fixed_conv_libfunc (satfractuns_optab,
1423 fixed_conv_modes[i].mode,
1424 fixed_conv_modes[j].mode, "satfractuns",
1425 fixed_conv_modes[i].name,
1426 fixed_conv_modes[j].name);
1427 }
1428 }
1429
1430 if (TARGET_AAPCS_BASED)
1431 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1432 }
1433
1434 /* On AAPCS systems, this is the "struct __va_list". */
1435 static GTY(()) tree va_list_type;
1436
1437 /* Return the type to use as __builtin_va_list. */
1438 static tree
1439 arm_build_builtin_va_list (void)
1440 {
1441 tree va_list_name;
1442 tree ap_field;
1443
1444 if (!TARGET_AAPCS_BASED)
1445 return std_build_builtin_va_list ();
1446
1447 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1448 defined as:
1449
1450 struct __va_list
1451 {
1452 void *__ap;
1453 };
1454
1455 The C Library ABI further reinforces this definition in \S
1456 4.1.
1457
1458 We must follow this definition exactly. The structure tag
1459 name is visible in C++ mangled names, and thus forms a part
1460 of the ABI. The field name may be used by people who
1461 #include <stdarg.h>. */
1462 /* Create the type. */
1463 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1464 /* Give it the required name. */
1465 va_list_name = build_decl (BUILTINS_LOCATION,
1466 TYPE_DECL,
1467 get_identifier ("__va_list"),
1468 va_list_type);
1469 DECL_ARTIFICIAL (va_list_name) = 1;
1470 TYPE_NAME (va_list_type) = va_list_name;
1471 TYPE_STUB_DECL (va_list_type) = va_list_name;
1472 /* Create the __ap field. */
1473 ap_field = build_decl (BUILTINS_LOCATION,
1474 FIELD_DECL,
1475 get_identifier ("__ap"),
1476 ptr_type_node);
1477 DECL_ARTIFICIAL (ap_field) = 1;
1478 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1479 TYPE_FIELDS (va_list_type) = ap_field;
1480 /* Compute its layout. */
1481 layout_type (va_list_type);
1482
1483 return va_list_type;
1484 }
1485
1486 /* Return an expression of type "void *" pointing to the next
1487 available argument in a variable-argument list. VALIST is the
1488 user-level va_list object, of type __builtin_va_list. */
1489 static tree
1490 arm_extract_valist_ptr (tree valist)
1491 {
1492 if (TREE_TYPE (valist) == error_mark_node)
1493 return error_mark_node;
1494
1495 /* On an AAPCS target, the pointer is stored within "struct
1496 va_list". */
1497 if (TARGET_AAPCS_BASED)
1498 {
1499 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1500 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1501 valist, ap_field, NULL_TREE);
1502 }
1503
1504 return valist;
1505 }
1506
1507 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1508 static void
1509 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1510 {
1511 valist = arm_extract_valist_ptr (valist);
1512 std_expand_builtin_va_start (valist, nextarg);
1513 }
1514
1515 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1516 static tree
1517 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1518 gimple_seq *post_p)
1519 {
1520 valist = arm_extract_valist_ptr (valist);
1521 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1522 }
1523
1524 /* Fix up any incompatible options that the user has specified. */
1525 static void
1526 arm_option_override (void)
1527 {
1528 if (global_options_set.x_arm_arch_option)
1529 arm_selected_arch = &all_architectures[arm_arch_option];
1530
1531 if (global_options_set.x_arm_cpu_option)
1532 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1533
1534 if (global_options_set.x_arm_tune_option)
1535 arm_selected_tune = &all_cores[(int) arm_tune_option];
1536
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS;
1539 #endif
1540
1541 if (arm_selected_arch)
1542 {
1543 if (arm_selected_cpu)
1544 {
1545 /* Check for conflict between mcpu and march. */
1546 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1547 {
1548 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1549 arm_selected_cpu->name, arm_selected_arch->name);
1550 /* -march wins for code generation.
1551 -mcpu wins for default tuning. */
1552 if (!arm_selected_tune)
1553 arm_selected_tune = arm_selected_cpu;
1554
1555 arm_selected_cpu = arm_selected_arch;
1556 }
1557 else
1558 /* -mcpu wins. */
1559 arm_selected_arch = NULL;
1560 }
1561 else
1562 /* Pick a CPU based on the architecture. */
1563 arm_selected_cpu = arm_selected_arch;
1564 }
1565
1566 /* If the user did not specify a processor, choose one for them. */
1567 if (!arm_selected_cpu)
1568 {
1569 const struct processors * sel;
1570 unsigned int sought;
1571
1572 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1573 if (!arm_selected_cpu->name)
1574 {
1575 #ifdef SUBTARGET_CPU_DEFAULT
1576 /* Use the subtarget default CPU if none was specified by
1577 configure. */
1578 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1579 #endif
1580 /* Default to ARM6. */
1581 if (!arm_selected_cpu->name)
1582 arm_selected_cpu = &all_cores[arm6];
1583 }
1584
1585 sel = arm_selected_cpu;
1586 insn_flags = sel->flags;
1587
1588 /* Now check to see if the user has specified some command line
1589 switch that require certain abilities from the cpu. */
1590 sought = 0;
1591
1592 if (TARGET_INTERWORK || TARGET_THUMB)
1593 {
1594 sought |= (FL_THUMB | FL_MODE32);
1595
1596 /* There are no ARM processors that support both APCS-26 and
1597 interworking. Therefore we force FL_MODE26 to be removed
1598 from insn_flags here (if it was set), so that the search
1599 below will always be able to find a compatible processor. */
1600 insn_flags &= ~FL_MODE26;
1601 }
1602
1603 if (sought != 0 && ((sought & insn_flags) != sought))
1604 {
1605 /* Try to locate a CPU type that supports all of the abilities
1606 of the default CPU, plus the extra abilities requested by
1607 the user. */
1608 for (sel = all_cores; sel->name != NULL; sel++)
1609 if ((sel->flags & sought) == (sought | insn_flags))
1610 break;
1611
1612 if (sel->name == NULL)
1613 {
1614 unsigned current_bit_count = 0;
1615 const struct processors * best_fit = NULL;
1616
1617 /* Ideally we would like to issue an error message here
1618 saying that it was not possible to find a CPU compatible
1619 with the default CPU, but which also supports the command
1620 line options specified by the programmer, and so they
1621 ought to use the -mcpu=<name> command line option to
1622 override the default CPU type.
1623
1624 If we cannot find a cpu that has both the
1625 characteristics of the default cpu and the given
1626 command line options we scan the array again looking
1627 for a best match. */
1628 for (sel = all_cores; sel->name != NULL; sel++)
1629 if ((sel->flags & sought) == sought)
1630 {
1631 unsigned count;
1632
1633 count = bit_count (sel->flags & insn_flags);
1634
1635 if (count >= current_bit_count)
1636 {
1637 best_fit = sel;
1638 current_bit_count = count;
1639 }
1640 }
1641
1642 gcc_assert (best_fit);
1643 sel = best_fit;
1644 }
1645
1646 arm_selected_cpu = sel;
1647 }
1648 }
1649
1650 gcc_assert (arm_selected_cpu);
1651 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1652 if (!arm_selected_tune)
1653 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1654
1655 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1656 insn_flags = arm_selected_cpu->flags;
1657 arm_base_arch = arm_selected_cpu->base_arch;
1658
1659 arm_tune = arm_selected_tune->core;
1660 tune_flags = arm_selected_tune->flags;
1661 current_tune = arm_selected_tune->tune;
1662
1663 /* Make sure that the processor choice does not conflict with any of the
1664 other command line choices. */
1665 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1666 error ("target CPU does not support ARM mode");
1667
1668 /* BPABI targets use linker tricks to allow interworking on cores
1669 without thumb support. */
1670 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1671 {
1672 warning (0, "target CPU does not support interworking" );
1673 target_flags &= ~MASK_INTERWORK;
1674 }
1675
1676 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1677 {
1678 warning (0, "target CPU does not support THUMB instructions");
1679 target_flags &= ~MASK_THUMB;
1680 }
1681
1682 if (TARGET_APCS_FRAME && TARGET_THUMB)
1683 {
1684 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1685 target_flags &= ~MASK_APCS_FRAME;
1686 }
1687
1688 /* Callee super interworking implies thumb interworking. Adding
1689 this to the flags here simplifies the logic elsewhere. */
1690 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1691 target_flags |= MASK_INTERWORK;
1692
1693 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1694 from here where no function is being compiled currently. */
1695 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1696 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1697
1698 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1699 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1700
1701 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1702 {
1703 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1704 target_flags |= MASK_APCS_FRAME;
1705 }
1706
1707 if (TARGET_POKE_FUNCTION_NAME)
1708 target_flags |= MASK_APCS_FRAME;
1709
1710 if (TARGET_APCS_REENT && flag_pic)
1711 error ("-fpic and -mapcs-reent are incompatible");
1712
1713 if (TARGET_APCS_REENT)
1714 warning (0, "APCS reentrant code not supported. Ignored");
1715
1716 /* If this target is normally configured to use APCS frames, warn if they
1717 are turned off and debugging is turned on. */
1718 if (TARGET_ARM
1719 && write_symbols != NO_DEBUG
1720 && !TARGET_APCS_FRAME
1721 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1722 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1723
1724 if (TARGET_APCS_FLOAT)
1725 warning (0, "passing floating point arguments in fp regs not yet supported");
1726
1727 if (TARGET_LITTLE_WORDS)
1728 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1729 "will be removed in a future release");
1730
1731 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1732 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1733 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1734 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1735 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1736 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1737 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1738 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1739 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1740 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1741 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1742 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1743 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1744
1745 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1746 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1747 thumb_code = TARGET_ARM == 0;
1748 thumb1_code = TARGET_THUMB1 != 0;
1749 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1750 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1751 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1752 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1753 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1754 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1755 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1756
1757 /* If we are not using the default (ARM mode) section anchor offset
1758 ranges, then set the correct ranges now. */
1759 if (TARGET_THUMB1)
1760 {
1761 /* Thumb-1 LDR instructions cannot have negative offsets.
1762 Permissible positive offset ranges are 5-bit (for byte loads),
1763 6-bit (for halfword loads), or 7-bit (for word loads).
1764 Empirical results suggest a 7-bit anchor range gives the best
1765 overall code size. */
1766 targetm.min_anchor_offset = 0;
1767 targetm.max_anchor_offset = 127;
1768 }
1769 else if (TARGET_THUMB2)
1770 {
1771 /* The minimum is set such that the total size of the block
1772 for a particular anchor is 248 + 1 + 4095 bytes, which is
1773 divisible by eight, ensuring natural spacing of anchors. */
1774 targetm.min_anchor_offset = -248;
1775 targetm.max_anchor_offset = 4095;
1776 }
1777
1778 /* V5 code we generate is completely interworking capable, so we turn off
1779 TARGET_INTERWORK here to avoid many tests later on. */
1780
1781 /* XXX However, we must pass the right pre-processor defines to CPP
1782 or GLD can get confused. This is a hack. */
1783 if (TARGET_INTERWORK)
1784 arm_cpp_interwork = 1;
1785
1786 if (arm_arch5)
1787 target_flags &= ~MASK_INTERWORK;
1788
1789 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1790 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1791
1792 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1793 error ("iwmmxt abi requires an iwmmxt capable cpu");
1794
1795 if (!global_options_set.x_arm_fpu_index)
1796 {
1797 const char *target_fpu_name;
1798 bool ok;
1799
1800 #ifdef FPUTYPE_DEFAULT
1801 target_fpu_name = FPUTYPE_DEFAULT;
1802 #else
1803 target_fpu_name = "vfp";
1804 #endif
1805
1806 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1807 CL_TARGET);
1808 gcc_assert (ok);
1809 }
1810
1811 arm_fpu_desc = &all_fpus[arm_fpu_index];
1812
1813 switch (arm_fpu_desc->model)
1814 {
1815 case ARM_FP_MODEL_VFP:
1816 arm_fpu_attr = FPU_VFP;
1817 break;
1818
1819 default:
1820 gcc_unreachable();
1821 }
1822
1823 if (TARGET_AAPCS_BASED)
1824 {
1825 if (TARGET_CALLER_INTERWORKING)
1826 error ("AAPCS does not support -mcaller-super-interworking");
1827 else
1828 if (TARGET_CALLEE_INTERWORKING)
1829 error ("AAPCS does not support -mcallee-super-interworking");
1830 }
1831
1832 /* iWMMXt and NEON are incompatible. */
1833 if (TARGET_IWMMXT && TARGET_NEON)
1834 error ("iWMMXt and NEON are incompatible");
1835
1836 /* iWMMXt unsupported under Thumb mode. */
1837 if (TARGET_THUMB && TARGET_IWMMXT)
1838 error ("iWMMXt unsupported under Thumb mode");
1839
1840 /* __fp16 support currently assumes the core has ldrh. */
1841 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1842 sorry ("__fp16 and no ldrh");
1843
1844 /* If soft-float is specified then don't use FPU. */
1845 if (TARGET_SOFT_FLOAT)
1846 arm_fpu_attr = FPU_NONE;
1847
1848 if (TARGET_AAPCS_BASED)
1849 {
1850 if (arm_abi == ARM_ABI_IWMMXT)
1851 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1852 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1853 && TARGET_HARD_FLOAT
1854 && TARGET_VFP)
1855 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1856 else
1857 arm_pcs_default = ARM_PCS_AAPCS;
1858 }
1859 else
1860 {
1861 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1862 sorry ("-mfloat-abi=hard and VFP");
1863
1864 if (arm_abi == ARM_ABI_APCS)
1865 arm_pcs_default = ARM_PCS_APCS;
1866 else
1867 arm_pcs_default = ARM_PCS_ATPCS;
1868 }
1869
1870 /* For arm2/3 there is no need to do any scheduling if we are doing
1871 software floating-point. */
1872 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1873 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1874
1875 /* Use the cp15 method if it is available. */
1876 if (target_thread_pointer == TP_AUTO)
1877 {
1878 if (arm_arch6k && !TARGET_THUMB1)
1879 target_thread_pointer = TP_CP15;
1880 else
1881 target_thread_pointer = TP_SOFT;
1882 }
1883
1884 if (TARGET_HARD_TP && TARGET_THUMB1)
1885 error ("can not use -mtp=cp15 with 16-bit Thumb");
1886
1887 /* Override the default structure alignment for AAPCS ABI. */
1888 if (!global_options_set.x_arm_structure_size_boundary)
1889 {
1890 if (TARGET_AAPCS_BASED)
1891 arm_structure_size_boundary = 8;
1892 }
1893 else
1894 {
1895 if (arm_structure_size_boundary != 8
1896 && arm_structure_size_boundary != 32
1897 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1898 {
1899 if (ARM_DOUBLEWORD_ALIGN)
1900 warning (0,
1901 "structure size boundary can only be set to 8, 32 or 64");
1902 else
1903 warning (0, "structure size boundary can only be set to 8 or 32");
1904 arm_structure_size_boundary
1905 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1906 }
1907 }
1908
1909 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1910 {
1911 error ("RTP PIC is incompatible with Thumb");
1912 flag_pic = 0;
1913 }
1914
1915 /* If stack checking is disabled, we can use r10 as the PIC register,
1916 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1917 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1918 {
1919 if (TARGET_VXWORKS_RTP)
1920 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1921 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1922 }
1923
1924 if (flag_pic && TARGET_VXWORKS_RTP)
1925 arm_pic_register = 9;
1926
1927 if (arm_pic_register_string != NULL)
1928 {
1929 int pic_register = decode_reg_name (arm_pic_register_string);
1930
1931 if (!flag_pic)
1932 warning (0, "-mpic-register= is useless without -fpic");
1933
1934 /* Prevent the user from choosing an obviously stupid PIC register. */
1935 else if (pic_register < 0 || call_used_regs[pic_register]
1936 || pic_register == HARD_FRAME_POINTER_REGNUM
1937 || pic_register == STACK_POINTER_REGNUM
1938 || pic_register >= PC_REGNUM
1939 || (TARGET_VXWORKS_RTP
1940 && (unsigned int) pic_register != arm_pic_register))
1941 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1942 else
1943 arm_pic_register = pic_register;
1944 }
1945
1946 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1947 if (fix_cm3_ldrd == 2)
1948 {
1949 if (arm_selected_cpu->core == cortexm3)
1950 fix_cm3_ldrd = 1;
1951 else
1952 fix_cm3_ldrd = 0;
1953 }
1954
1955 /* Enable -munaligned-access by default for
1956 - all ARMv6 architecture-based processors
1957 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1958
1959 Disable -munaligned-access by default for
1960 - all pre-ARMv6 architecture-based processors
1961 - ARMv6-M architecture-based processors. */
1962
1963 if (unaligned_access == 2)
1964 {
1965 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1966 unaligned_access = 1;
1967 else
1968 unaligned_access = 0;
1969 }
1970 else if (unaligned_access == 1
1971 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1972 {
1973 warning (0, "target CPU does not support unaligned accesses");
1974 unaligned_access = 0;
1975 }
1976
1977 if (TARGET_THUMB1 && flag_schedule_insns)
1978 {
1979 /* Don't warn since it's on by default in -O2. */
1980 flag_schedule_insns = 0;
1981 }
1982
1983 if (optimize_size)
1984 {
1985 /* If optimizing for size, bump the number of instructions that we
1986 are prepared to conditionally execute (even on a StrongARM). */
1987 max_insns_skipped = 6;
1988 }
1989 else
1990 max_insns_skipped = current_tune->max_insns_skipped;
1991
1992 /* Hot/Cold partitioning is not currently supported, since we can't
1993 handle literal pool placement in that case. */
1994 if (flag_reorder_blocks_and_partition)
1995 {
1996 inform (input_location,
1997 "-freorder-blocks-and-partition not supported on this architecture");
1998 flag_reorder_blocks_and_partition = 0;
1999 flag_reorder_blocks = 1;
2000 }
2001
2002 if (flag_pic)
2003 /* Hoisting PIC address calculations more aggressively provides a small,
2004 but measurable, size reduction for PIC code. Therefore, we decrease
2005 the bar for unrestricted expression hoisting to the cost of PIC address
2006 calculation, which is 2 instructions. */
2007 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2008 global_options.x_param_values,
2009 global_options_set.x_param_values);
2010
2011 /* ARM EABI defaults to strict volatile bitfields. */
2012 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2013 && abi_version_at_least(2))
2014 flag_strict_volatile_bitfields = 1;
2015
2016 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2017 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2018 if (flag_prefetch_loop_arrays < 0
2019 && HAVE_prefetch
2020 && optimize >= 3
2021 && current_tune->num_prefetch_slots > 0)
2022 flag_prefetch_loop_arrays = 1;
2023
2024 /* Set up parameters to be used in prefetching algorithm. Do not override the
2025 defaults unless we are tuning for a core we have researched values for. */
2026 if (current_tune->num_prefetch_slots > 0)
2027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2028 current_tune->num_prefetch_slots,
2029 global_options.x_param_values,
2030 global_options_set.x_param_values);
2031 if (current_tune->l1_cache_line_size >= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2033 current_tune->l1_cache_line_size,
2034 global_options.x_param_values,
2035 global_options_set.x_param_values);
2036 if (current_tune->l1_cache_size >= 0)
2037 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2038 current_tune->l1_cache_size,
2039 global_options.x_param_values,
2040 global_options_set.x_param_values);
2041
2042 /* Use the alternative scheduling-pressure algorithm by default. */
2043 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2044 global_options.x_param_values,
2045 global_options_set.x_param_values);
2046
2047 /* Register global variables with the garbage collector. */
2048 arm_add_gc_roots ();
2049 }
2050
2051 static void
2052 arm_add_gc_roots (void)
2053 {
2054 gcc_obstack_init(&minipool_obstack);
2055 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2056 }
2057 \f
2058 /* A table of known ARM exception types.
2059 For use with the interrupt function attribute. */
2060
2061 typedef struct
2062 {
2063 const char *const arg;
2064 const unsigned long return_value;
2065 }
2066 isr_attribute_arg;
2067
2068 static const isr_attribute_arg isr_attribute_args [] =
2069 {
2070 { "IRQ", ARM_FT_ISR },
2071 { "irq", ARM_FT_ISR },
2072 { "FIQ", ARM_FT_FIQ },
2073 { "fiq", ARM_FT_FIQ },
2074 { "ABORT", ARM_FT_ISR },
2075 { "abort", ARM_FT_ISR },
2076 { "ABORT", ARM_FT_ISR },
2077 { "abort", ARM_FT_ISR },
2078 { "UNDEF", ARM_FT_EXCEPTION },
2079 { "undef", ARM_FT_EXCEPTION },
2080 { "SWI", ARM_FT_EXCEPTION },
2081 { "swi", ARM_FT_EXCEPTION },
2082 { NULL, ARM_FT_NORMAL }
2083 };
2084
2085 /* Returns the (interrupt) function type of the current
2086 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2087
2088 static unsigned long
2089 arm_isr_value (tree argument)
2090 {
2091 const isr_attribute_arg * ptr;
2092 const char * arg;
2093
2094 if (!arm_arch_notm)
2095 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2096
2097 /* No argument - default to IRQ. */
2098 if (argument == NULL_TREE)
2099 return ARM_FT_ISR;
2100
2101 /* Get the value of the argument. */
2102 if (TREE_VALUE (argument) == NULL_TREE
2103 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2104 return ARM_FT_UNKNOWN;
2105
2106 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2107
2108 /* Check it against the list of known arguments. */
2109 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2110 if (streq (arg, ptr->arg))
2111 return ptr->return_value;
2112
2113 /* An unrecognized interrupt type. */
2114 return ARM_FT_UNKNOWN;
2115 }
2116
2117 /* Computes the type of the current function. */
2118
2119 static unsigned long
2120 arm_compute_func_type (void)
2121 {
2122 unsigned long type = ARM_FT_UNKNOWN;
2123 tree a;
2124 tree attr;
2125
2126 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2127
2128 /* Decide if the current function is volatile. Such functions
2129 never return, and many memory cycles can be saved by not storing
2130 register values that will never be needed again. This optimization
2131 was added to speed up context switching in a kernel application. */
2132 if (optimize > 0
2133 && (TREE_NOTHROW (current_function_decl)
2134 || !(flag_unwind_tables
2135 || (flag_exceptions
2136 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2137 && TREE_THIS_VOLATILE (current_function_decl))
2138 type |= ARM_FT_VOLATILE;
2139
2140 if (cfun->static_chain_decl != NULL)
2141 type |= ARM_FT_NESTED;
2142
2143 attr = DECL_ATTRIBUTES (current_function_decl);
2144
2145 a = lookup_attribute ("naked", attr);
2146 if (a != NULL_TREE)
2147 type |= ARM_FT_NAKED;
2148
2149 a = lookup_attribute ("isr", attr);
2150 if (a == NULL_TREE)
2151 a = lookup_attribute ("interrupt", attr);
2152
2153 if (a == NULL_TREE)
2154 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2155 else
2156 type |= arm_isr_value (TREE_VALUE (a));
2157
2158 return type;
2159 }
2160
2161 /* Returns the type of the current function. */
2162
2163 unsigned long
2164 arm_current_func_type (void)
2165 {
2166 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2167 cfun->machine->func_type = arm_compute_func_type ();
2168
2169 return cfun->machine->func_type;
2170 }
2171
2172 bool
2173 arm_allocate_stack_slots_for_args (void)
2174 {
2175 /* Naked functions should not allocate stack slots for arguments. */
2176 return !IS_NAKED (arm_current_func_type ());
2177 }
2178
2179 static bool
2180 arm_warn_func_return (tree decl)
2181 {
2182 /* Naked functions are implemented entirely in assembly, including the
2183 return sequence, so suppress warnings about this. */
2184 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2185 }
2186
2187 \f
2188 /* Output assembler code for a block containing the constant parts
2189 of a trampoline, leaving space for the variable parts.
2190
2191 On the ARM, (if r8 is the static chain regnum, and remembering that
2192 referencing pc adds an offset of 8) the trampoline looks like:
2193 ldr r8, [pc, #0]
2194 ldr pc, [pc]
2195 .word static chain value
2196 .word function's address
2197 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2198
2199 static void
2200 arm_asm_trampoline_template (FILE *f)
2201 {
2202 if (TARGET_ARM)
2203 {
2204 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2205 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2206 }
2207 else if (TARGET_THUMB2)
2208 {
2209 /* The Thumb-2 trampoline is similar to the arm implementation.
2210 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2211 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2212 STATIC_CHAIN_REGNUM, PC_REGNUM);
2213 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2214 }
2215 else
2216 {
2217 ASM_OUTPUT_ALIGN (f, 2);
2218 fprintf (f, "\t.code\t16\n");
2219 fprintf (f, ".Ltrampoline_start:\n");
2220 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2221 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2222 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2223 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2224 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2225 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2226 }
2227 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2228 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2229 }
2230
2231 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2232
2233 static void
2234 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2235 {
2236 rtx fnaddr, mem, a_tramp;
2237
2238 emit_block_move (m_tramp, assemble_trampoline_template (),
2239 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2240
2241 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2242 emit_move_insn (mem, chain_value);
2243
2244 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2245 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2246 emit_move_insn (mem, fnaddr);
2247
2248 a_tramp = XEXP (m_tramp, 0);
2249 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2250 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2251 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2252 }
2253
2254 /* Thumb trampolines should be entered in thumb mode, so set
2255 the bottom bit of the address. */
2256
2257 static rtx
2258 arm_trampoline_adjust_address (rtx addr)
2259 {
2260 if (TARGET_THUMB)
2261 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2262 NULL, 0, OPTAB_LIB_WIDEN);
2263 return addr;
2264 }
2265 \f
2266 /* Return 1 if it is possible to return using a single instruction.
2267 If SIBLING is non-null, this is a test for a return before a sibling
2268 call. SIBLING is the call insn, so we can examine its register usage. */
2269
2270 int
2271 use_return_insn (int iscond, rtx sibling)
2272 {
2273 int regno;
2274 unsigned int func_type;
2275 unsigned long saved_int_regs;
2276 unsigned HOST_WIDE_INT stack_adjust;
2277 arm_stack_offsets *offsets;
2278
2279 /* Never use a return instruction before reload has run. */
2280 if (!reload_completed)
2281 return 0;
2282
2283 func_type = arm_current_func_type ();
2284
2285 /* Naked, volatile and stack alignment functions need special
2286 consideration. */
2287 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2288 return 0;
2289
2290 /* So do interrupt functions that use the frame pointer and Thumb
2291 interrupt functions. */
2292 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2293 return 0;
2294
2295 offsets = arm_get_frame_offsets ();
2296 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2297
2298 /* As do variadic functions. */
2299 if (crtl->args.pretend_args_size
2300 || cfun->machine->uses_anonymous_args
2301 /* Or if the function calls __builtin_eh_return () */
2302 || crtl->calls_eh_return
2303 /* Or if the function calls alloca */
2304 || cfun->calls_alloca
2305 /* Or if there is a stack adjustment. However, if the stack pointer
2306 is saved on the stack, we can use a pre-incrementing stack load. */
2307 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2308 && stack_adjust == 4)))
2309 return 0;
2310
2311 saved_int_regs = offsets->saved_regs_mask;
2312
2313 /* Unfortunately, the insn
2314
2315 ldmib sp, {..., sp, ...}
2316
2317 triggers a bug on most SA-110 based devices, such that the stack
2318 pointer won't be correctly restored if the instruction takes a
2319 page fault. We work around this problem by popping r3 along with
2320 the other registers, since that is never slower than executing
2321 another instruction.
2322
2323 We test for !arm_arch5 here, because code for any architecture
2324 less than this could potentially be run on one of the buggy
2325 chips. */
2326 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2327 {
2328 /* Validate that r3 is a call-clobbered register (always true in
2329 the default abi) ... */
2330 if (!call_used_regs[3])
2331 return 0;
2332
2333 /* ... that it isn't being used for a return value ... */
2334 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2335 return 0;
2336
2337 /* ... or for a tail-call argument ... */
2338 if (sibling)
2339 {
2340 gcc_assert (CALL_P (sibling));
2341
2342 if (find_regno_fusage (sibling, USE, 3))
2343 return 0;
2344 }
2345
2346 /* ... and that there are no call-saved registers in r0-r2
2347 (always true in the default ABI). */
2348 if (saved_int_regs & 0x7)
2349 return 0;
2350 }
2351
2352 /* Can't be done if interworking with Thumb, and any registers have been
2353 stacked. */
2354 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2355 return 0;
2356
2357 /* On StrongARM, conditional returns are expensive if they aren't
2358 taken and multiple registers have been stacked. */
2359 if (iscond && arm_tune_strongarm)
2360 {
2361 /* Conditional return when just the LR is stored is a simple
2362 conditional-load instruction, that's not expensive. */
2363 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2364 return 0;
2365
2366 if (flag_pic
2367 && arm_pic_register != INVALID_REGNUM
2368 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2369 return 0;
2370 }
2371
2372 /* If there are saved registers but the LR isn't saved, then we need
2373 two instructions for the return. */
2374 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2375 return 0;
2376
2377 /* Can't be done if any of the VFP regs are pushed,
2378 since this also requires an insn. */
2379 if (TARGET_HARD_FLOAT && TARGET_VFP)
2380 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2381 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2382 return 0;
2383
2384 if (TARGET_REALLY_IWMMXT)
2385 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2386 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2387 return 0;
2388
2389 return 1;
2390 }
2391
2392 /* Return TRUE if int I is a valid immediate ARM constant. */
2393
2394 int
2395 const_ok_for_arm (HOST_WIDE_INT i)
2396 {
2397 int lowbit;
2398
2399 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2400 be all zero, or all one. */
2401 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2402 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2403 != ((~(unsigned HOST_WIDE_INT) 0)
2404 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2405 return FALSE;
2406
2407 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2408
2409 /* Fast return for 0 and small values. We must do this for zero, since
2410 the code below can't handle that one case. */
2411 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2412 return TRUE;
2413
2414 /* Get the number of trailing zeros. */
2415 lowbit = ffs((int) i) - 1;
2416
2417 /* Only even shifts are allowed in ARM mode so round down to the
2418 nearest even number. */
2419 if (TARGET_ARM)
2420 lowbit &= ~1;
2421
2422 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2423 return TRUE;
2424
2425 if (TARGET_ARM)
2426 {
2427 /* Allow rotated constants in ARM mode. */
2428 if (lowbit <= 4
2429 && ((i & ~0xc000003f) == 0
2430 || (i & ~0xf000000f) == 0
2431 || (i & ~0xfc000003) == 0))
2432 return TRUE;
2433 }
2434 else
2435 {
2436 HOST_WIDE_INT v;
2437
2438 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2439 v = i & 0xff;
2440 v |= v << 16;
2441 if (i == v || i == (v | (v << 8)))
2442 return TRUE;
2443
2444 /* Allow repeated pattern 0xXY00XY00. */
2445 v = i & 0xff00;
2446 v |= v << 16;
2447 if (i == v)
2448 return TRUE;
2449 }
2450
2451 return FALSE;
2452 }
2453
2454 /* Return true if I is a valid constant for the operation CODE. */
2455 int
2456 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2457 {
2458 if (const_ok_for_arm (i))
2459 return 1;
2460
2461 switch (code)
2462 {
2463 case SET:
2464 /* See if we can use movw. */
2465 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2466 return 1;
2467 else
2468 /* Otherwise, try mvn. */
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2470
2471 case PLUS:
2472 /* See if we can use addw or subw. */
2473 if (TARGET_THUMB2
2474 && ((i & 0xfffff000) == 0
2475 || ((-i) & 0xfffff000) == 0))
2476 return 1;
2477 /* else fall through. */
2478
2479 case COMPARE:
2480 case EQ:
2481 case NE:
2482 case GT:
2483 case LE:
2484 case LT:
2485 case GE:
2486 case GEU:
2487 case LTU:
2488 case GTU:
2489 case LEU:
2490 case UNORDERED:
2491 case ORDERED:
2492 case UNEQ:
2493 case UNGE:
2494 case UNLT:
2495 case UNGT:
2496 case UNLE:
2497 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2498
2499 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2500 case XOR:
2501 return 0;
2502
2503 case IOR:
2504 if (TARGET_THUMB2)
2505 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2506 return 0;
2507
2508 case AND:
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2510
2511 default:
2512 gcc_unreachable ();
2513 }
2514 }
2515
2516 /* Return true if I is a valid di mode constant for the operation CODE. */
2517 int
2518 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2519 {
2520 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2521 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2522 rtx hi = GEN_INT (hi_val);
2523 rtx lo = GEN_INT (lo_val);
2524
2525 if (TARGET_THUMB1)
2526 return 0;
2527
2528 switch (code)
2529 {
2530 case PLUS:
2531 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2532
2533 default:
2534 return 0;
2535 }
2536 }
2537
2538 /* Emit a sequence of insns to handle a large constant.
2539 CODE is the code of the operation required, it can be any of SET, PLUS,
2540 IOR, AND, XOR, MINUS;
2541 MODE is the mode in which the operation is being performed;
2542 VAL is the integer to operate on;
2543 SOURCE is the other operand (a register, or a null-pointer for SET);
2544 SUBTARGETS means it is safe to create scratch registers if that will
2545 either produce a simpler sequence, or we will want to cse the values.
2546 Return value is the number of insns emitted. */
2547
2548 /* ??? Tweak this for thumb2. */
2549 int
2550 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2551 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2552 {
2553 rtx cond;
2554
2555 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2556 cond = COND_EXEC_TEST (PATTERN (insn));
2557 else
2558 cond = NULL_RTX;
2559
2560 if (subtargets || code == SET
2561 || (REG_P (target) && REG_P (source)
2562 && REGNO (target) != REGNO (source)))
2563 {
2564 /* After arm_reorg has been called, we can't fix up expensive
2565 constants by pushing them into memory so we must synthesize
2566 them in-line, regardless of the cost. This is only likely to
2567 be more costly on chips that have load delay slots and we are
2568 compiling without running the scheduler (so no splitting
2569 occurred before the final instruction emission).
2570
2571 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2572 */
2573 if (!after_arm_reorg
2574 && !cond
2575 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2576 1, 0)
2577 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2578 + (code != SET))))
2579 {
2580 if (code == SET)
2581 {
2582 /* Currently SET is the only monadic value for CODE, all
2583 the rest are diadic. */
2584 if (TARGET_USE_MOVT)
2585 arm_emit_movpair (target, GEN_INT (val));
2586 else
2587 emit_set_insn (target, GEN_INT (val));
2588
2589 return 1;
2590 }
2591 else
2592 {
2593 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2594
2595 if (TARGET_USE_MOVT)
2596 arm_emit_movpair (temp, GEN_INT (val));
2597 else
2598 emit_set_insn (temp, GEN_INT (val));
2599
2600 /* For MINUS, the value is subtracted from, since we never
2601 have subtraction of a constant. */
2602 if (code == MINUS)
2603 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2604 else
2605 emit_set_insn (target,
2606 gen_rtx_fmt_ee (code, mode, source, temp));
2607 return 2;
2608 }
2609 }
2610 }
2611
2612 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2613 1);
2614 }
2615
2616 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2617 ARM/THUMB2 immediates, and add up to VAL.
2618 Thr function return value gives the number of insns required. */
2619 static int
2620 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2621 struct four_ints *return_sequence)
2622 {
2623 int best_consecutive_zeros = 0;
2624 int i;
2625 int best_start = 0;
2626 int insns1, insns2;
2627 struct four_ints tmp_sequence;
2628
2629 /* If we aren't targeting ARM, the best place to start is always at
2630 the bottom, otherwise look more closely. */
2631 if (TARGET_ARM)
2632 {
2633 for (i = 0; i < 32; i += 2)
2634 {
2635 int consecutive_zeros = 0;
2636
2637 if (!(val & (3 << i)))
2638 {
2639 while ((i < 32) && !(val & (3 << i)))
2640 {
2641 consecutive_zeros += 2;
2642 i += 2;
2643 }
2644 if (consecutive_zeros > best_consecutive_zeros)
2645 {
2646 best_consecutive_zeros = consecutive_zeros;
2647 best_start = i - consecutive_zeros;
2648 }
2649 i -= 2;
2650 }
2651 }
2652 }
2653
2654 /* So long as it won't require any more insns to do so, it's
2655 desirable to emit a small constant (in bits 0...9) in the last
2656 insn. This way there is more chance that it can be combined with
2657 a later addressing insn to form a pre-indexed load or store
2658 operation. Consider:
2659
2660 *((volatile int *)0xe0000100) = 1;
2661 *((volatile int *)0xe0000110) = 2;
2662
2663 We want this to wind up as:
2664
2665 mov rA, #0xe0000000
2666 mov rB, #1
2667 str rB, [rA, #0x100]
2668 mov rB, #2
2669 str rB, [rA, #0x110]
2670
2671 rather than having to synthesize both large constants from scratch.
2672
2673 Therefore, we calculate how many insns would be required to emit
2674 the constant starting from `best_start', and also starting from
2675 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2676 yield a shorter sequence, we may as well use zero. */
2677 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2678 if (best_start != 0
2679 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2680 {
2681 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2682 if (insns2 <= insns1)
2683 {
2684 *return_sequence = tmp_sequence;
2685 insns1 = insns2;
2686 }
2687 }
2688
2689 return insns1;
2690 }
2691
2692 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2693 static int
2694 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2695 struct four_ints *return_sequence, int i)
2696 {
2697 int remainder = val & 0xffffffff;
2698 int insns = 0;
2699
2700 /* Try and find a way of doing the job in either two or three
2701 instructions.
2702
2703 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2704 location. We start at position I. This may be the MSB, or
2705 optimial_immediate_sequence may have positioned it at the largest block
2706 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2707 wrapping around to the top of the word when we drop off the bottom.
2708 In the worst case this code should produce no more than four insns.
2709
2710 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2711 constants, shifted to any arbitrary location. We should always start
2712 at the MSB. */
2713 do
2714 {
2715 int end;
2716 unsigned int b1, b2, b3, b4;
2717 unsigned HOST_WIDE_INT result;
2718 int loc;
2719
2720 gcc_assert (insns < 4);
2721
2722 if (i <= 0)
2723 i += 32;
2724
2725 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2726 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2727 {
2728 loc = i;
2729 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2730 /* We can use addw/subw for the last 12 bits. */
2731 result = remainder;
2732 else
2733 {
2734 /* Use an 8-bit shifted/rotated immediate. */
2735 end = i - 8;
2736 if (end < 0)
2737 end += 32;
2738 result = remainder & ((0x0ff << end)
2739 | ((i < end) ? (0xff >> (32 - end))
2740 : 0));
2741 i -= 8;
2742 }
2743 }
2744 else
2745 {
2746 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2747 arbitrary shifts. */
2748 i -= TARGET_ARM ? 2 : 1;
2749 continue;
2750 }
2751
2752 /* Next, see if we can do a better job with a thumb2 replicated
2753 constant.
2754
2755 We do it this way around to catch the cases like 0x01F001E0 where
2756 two 8-bit immediates would work, but a replicated constant would
2757 make it worse.
2758
2759 TODO: 16-bit constants that don't clear all the bits, but still win.
2760 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2761 if (TARGET_THUMB2)
2762 {
2763 b1 = (remainder & 0xff000000) >> 24;
2764 b2 = (remainder & 0x00ff0000) >> 16;
2765 b3 = (remainder & 0x0000ff00) >> 8;
2766 b4 = remainder & 0xff;
2767
2768 if (loc > 24)
2769 {
2770 /* The 8-bit immediate already found clears b1 (and maybe b2),
2771 but must leave b3 and b4 alone. */
2772
2773 /* First try to find a 32-bit replicated constant that clears
2774 almost everything. We can assume that we can't do it in one,
2775 or else we wouldn't be here. */
2776 unsigned int tmp = b1 & b2 & b3 & b4;
2777 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2778 + (tmp << 24);
2779 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2780 + (tmp == b3) + (tmp == b4);
2781 if (tmp
2782 && (matching_bytes >= 3
2783 || (matching_bytes == 2
2784 && const_ok_for_op (remainder & ~tmp2, code))))
2785 {
2786 /* At least 3 of the bytes match, and the fourth has at
2787 least as many bits set, or two of the bytes match
2788 and it will only require one more insn to finish. */
2789 result = tmp2;
2790 i = tmp != b1 ? 32
2791 : tmp != b2 ? 24
2792 : tmp != b3 ? 16
2793 : 8;
2794 }
2795
2796 /* Second, try to find a 16-bit replicated constant that can
2797 leave three of the bytes clear. If b2 or b4 is already
2798 zero, then we can. If the 8-bit from above would not
2799 clear b2 anyway, then we still win. */
2800 else if (b1 == b3 && (!b2 || !b4
2801 || (remainder & 0x00ff0000 & ~result)))
2802 {
2803 result = remainder & 0xff00ff00;
2804 i = 24;
2805 }
2806 }
2807 else if (loc > 16)
2808 {
2809 /* The 8-bit immediate already found clears b2 (and maybe b3)
2810 and we don't get here unless b1 is alredy clear, but it will
2811 leave b4 unchanged. */
2812
2813 /* If we can clear b2 and b4 at once, then we win, since the
2814 8-bits couldn't possibly reach that far. */
2815 if (b2 == b4)
2816 {
2817 result = remainder & 0x00ff00ff;
2818 i = 16;
2819 }
2820 }
2821 }
2822
2823 return_sequence->i[insns++] = result;
2824 remainder &= ~result;
2825
2826 if (code == SET || code == MINUS)
2827 code = PLUS;
2828 }
2829 while (remainder);
2830
2831 return insns;
2832 }
2833
2834 /* Emit an instruction with the indicated PATTERN. If COND is
2835 non-NULL, conditionalize the execution of the instruction on COND
2836 being true. */
2837
2838 static void
2839 emit_constant_insn (rtx cond, rtx pattern)
2840 {
2841 if (cond)
2842 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2843 emit_insn (pattern);
2844 }
2845
2846 /* As above, but extra parameter GENERATE which, if clear, suppresses
2847 RTL generation. */
2848
2849 static int
2850 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2851 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2852 int generate)
2853 {
2854 int can_invert = 0;
2855 int can_negate = 0;
2856 int final_invert = 0;
2857 int i;
2858 int set_sign_bit_copies = 0;
2859 int clear_sign_bit_copies = 0;
2860 int clear_zero_bit_copies = 0;
2861 int set_zero_bit_copies = 0;
2862 int insns = 0, neg_insns, inv_insns;
2863 unsigned HOST_WIDE_INT temp1, temp2;
2864 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2865 struct four_ints *immediates;
2866 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2867
2868 /* Find out which operations are safe for a given CODE. Also do a quick
2869 check for degenerate cases; these can occur when DImode operations
2870 are split. */
2871 switch (code)
2872 {
2873 case SET:
2874 can_invert = 1;
2875 break;
2876
2877 case PLUS:
2878 can_negate = 1;
2879 break;
2880
2881 case IOR:
2882 if (remainder == 0xffffffff)
2883 {
2884 if (generate)
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target,
2887 GEN_INT (ARM_SIGN_EXTEND (val))));
2888 return 1;
2889 }
2890
2891 if (remainder == 0)
2892 {
2893 if (reload_completed && rtx_equal_p (target, source))
2894 return 0;
2895
2896 if (generate)
2897 emit_constant_insn (cond,
2898 gen_rtx_SET (VOIDmode, target, source));
2899 return 1;
2900 }
2901 break;
2902
2903 case AND:
2904 if (remainder == 0)
2905 {
2906 if (generate)
2907 emit_constant_insn (cond,
2908 gen_rtx_SET (VOIDmode, target, const0_rtx));
2909 return 1;
2910 }
2911 if (remainder == 0xffffffff)
2912 {
2913 if (reload_completed && rtx_equal_p (target, source))
2914 return 0;
2915 if (generate)
2916 emit_constant_insn (cond,
2917 gen_rtx_SET (VOIDmode, target, source));
2918 return 1;
2919 }
2920 can_invert = 1;
2921 break;
2922
2923 case XOR:
2924 if (remainder == 0)
2925 {
2926 if (reload_completed && rtx_equal_p (target, source))
2927 return 0;
2928 if (generate)
2929 emit_constant_insn (cond,
2930 gen_rtx_SET (VOIDmode, target, source));
2931 return 1;
2932 }
2933
2934 if (remainder == 0xffffffff)
2935 {
2936 if (generate)
2937 emit_constant_insn (cond,
2938 gen_rtx_SET (VOIDmode, target,
2939 gen_rtx_NOT (mode, source)));
2940 return 1;
2941 }
2942 final_invert = 1;
2943 break;
2944
2945 case MINUS:
2946 /* We treat MINUS as (val - source), since (source - val) is always
2947 passed as (source + (-val)). */
2948 if (remainder == 0)
2949 {
2950 if (generate)
2951 emit_constant_insn (cond,
2952 gen_rtx_SET (VOIDmode, target,
2953 gen_rtx_NEG (mode, source)));
2954 return 1;
2955 }
2956 if (const_ok_for_arm (val))
2957 {
2958 if (generate)
2959 emit_constant_insn (cond,
2960 gen_rtx_SET (VOIDmode, target,
2961 gen_rtx_MINUS (mode, GEN_INT (val),
2962 source)));
2963 return 1;
2964 }
2965
2966 break;
2967
2968 default:
2969 gcc_unreachable ();
2970 }
2971
2972 /* If we can do it in one insn get out quickly. */
2973 if (const_ok_for_op (val, code))
2974 {
2975 if (generate)
2976 emit_constant_insn (cond,
2977 gen_rtx_SET (VOIDmode, target,
2978 (source
2979 ? gen_rtx_fmt_ee (code, mode, source,
2980 GEN_INT (val))
2981 : GEN_INT (val))));
2982 return 1;
2983 }
2984
2985 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2986 insn. */
2987 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
2988 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
2989 {
2990 if (generate)
2991 {
2992 if (mode == SImode && i == 16)
2993 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
2994 smaller insn. */
2995 emit_constant_insn (cond,
2996 gen_zero_extendhisi2
2997 (target, gen_lowpart (HImode, source)));
2998 else
2999 /* Extz only supports SImode, but we can coerce the operands
3000 into that mode. */
3001 emit_constant_insn (cond,
3002 gen_extzv_t2 (gen_lowpart (SImode, target),
3003 gen_lowpart (SImode, source),
3004 GEN_INT (i), const0_rtx));
3005 }
3006
3007 return 1;
3008 }
3009
3010 /* Calculate a few attributes that may be useful for specific
3011 optimizations. */
3012 /* Count number of leading zeros. */
3013 for (i = 31; i >= 0; i--)
3014 {
3015 if ((remainder & (1 << i)) == 0)
3016 clear_sign_bit_copies++;
3017 else
3018 break;
3019 }
3020
3021 /* Count number of leading 1's. */
3022 for (i = 31; i >= 0; i--)
3023 {
3024 if ((remainder & (1 << i)) != 0)
3025 set_sign_bit_copies++;
3026 else
3027 break;
3028 }
3029
3030 /* Count number of trailing zero's. */
3031 for (i = 0; i <= 31; i++)
3032 {
3033 if ((remainder & (1 << i)) == 0)
3034 clear_zero_bit_copies++;
3035 else
3036 break;
3037 }
3038
3039 /* Count number of trailing 1's. */
3040 for (i = 0; i <= 31; i++)
3041 {
3042 if ((remainder & (1 << i)) != 0)
3043 set_zero_bit_copies++;
3044 else
3045 break;
3046 }
3047
3048 switch (code)
3049 {
3050 case SET:
3051 /* See if we can do this by sign_extending a constant that is known
3052 to be negative. This is a good, way of doing it, since the shift
3053 may well merge into a subsequent insn. */
3054 if (set_sign_bit_copies > 1)
3055 {
3056 if (const_ok_for_arm
3057 (temp1 = ARM_SIGN_EXTEND (remainder
3058 << (set_sign_bit_copies - 1))))
3059 {
3060 if (generate)
3061 {
3062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3063 emit_constant_insn (cond,
3064 gen_rtx_SET (VOIDmode, new_src,
3065 GEN_INT (temp1)));
3066 emit_constant_insn (cond,
3067 gen_ashrsi3 (target, new_src,
3068 GEN_INT (set_sign_bit_copies - 1)));
3069 }
3070 return 2;
3071 }
3072 /* For an inverted constant, we will need to set the low bits,
3073 these will be shifted out of harm's way. */
3074 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3075 if (const_ok_for_arm (~temp1))
3076 {
3077 if (generate)
3078 {
3079 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3080 emit_constant_insn (cond,
3081 gen_rtx_SET (VOIDmode, new_src,
3082 GEN_INT (temp1)));
3083 emit_constant_insn (cond,
3084 gen_ashrsi3 (target, new_src,
3085 GEN_INT (set_sign_bit_copies - 1)));
3086 }
3087 return 2;
3088 }
3089 }
3090
3091 /* See if we can calculate the value as the difference between two
3092 valid immediates. */
3093 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3094 {
3095 int topshift = clear_sign_bit_copies & ~1;
3096
3097 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3098 & (0xff000000 >> topshift));
3099
3100 /* If temp1 is zero, then that means the 9 most significant
3101 bits of remainder were 1 and we've caused it to overflow.
3102 When topshift is 0 we don't need to do anything since we
3103 can borrow from 'bit 32'. */
3104 if (temp1 == 0 && topshift != 0)
3105 temp1 = 0x80000000 >> (topshift - 1);
3106
3107 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3108
3109 if (const_ok_for_arm (temp2))
3110 {
3111 if (generate)
3112 {
3113 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3114 emit_constant_insn (cond,
3115 gen_rtx_SET (VOIDmode, new_src,
3116 GEN_INT (temp1)));
3117 emit_constant_insn (cond,
3118 gen_addsi3 (target, new_src,
3119 GEN_INT (-temp2)));
3120 }
3121
3122 return 2;
3123 }
3124 }
3125
3126 /* See if we can generate this by setting the bottom (or the top)
3127 16 bits, and then shifting these into the other half of the
3128 word. We only look for the simplest cases, to do more would cost
3129 too much. Be careful, however, not to generate this when the
3130 alternative would take fewer insns. */
3131 if (val & 0xffff0000)
3132 {
3133 temp1 = remainder & 0xffff0000;
3134 temp2 = remainder & 0x0000ffff;
3135
3136 /* Overlaps outside this range are best done using other methods. */
3137 for (i = 9; i < 24; i++)
3138 {
3139 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3140 && !const_ok_for_arm (temp2))
3141 {
3142 rtx new_src = (subtargets
3143 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3144 : target);
3145 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3146 source, subtargets, generate);
3147 source = new_src;
3148 if (generate)
3149 emit_constant_insn
3150 (cond,
3151 gen_rtx_SET
3152 (VOIDmode, target,
3153 gen_rtx_IOR (mode,
3154 gen_rtx_ASHIFT (mode, source,
3155 GEN_INT (i)),
3156 source)));
3157 return insns + 1;
3158 }
3159 }
3160
3161 /* Don't duplicate cases already considered. */
3162 for (i = 17; i < 24; i++)
3163 {
3164 if (((temp1 | (temp1 >> i)) == remainder)
3165 && !const_ok_for_arm (temp1))
3166 {
3167 rtx new_src = (subtargets
3168 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3169 : target);
3170 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3171 source, subtargets, generate);
3172 source = new_src;
3173 if (generate)
3174 emit_constant_insn
3175 (cond,
3176 gen_rtx_SET (VOIDmode, target,
3177 gen_rtx_IOR
3178 (mode,
3179 gen_rtx_LSHIFTRT (mode, source,
3180 GEN_INT (i)),
3181 source)));
3182 return insns + 1;
3183 }
3184 }
3185 }
3186 break;
3187
3188 case IOR:
3189 case XOR:
3190 /* If we have IOR or XOR, and the constant can be loaded in a
3191 single instruction, and we can find a temporary to put it in,
3192 then this can be done in two instructions instead of 3-4. */
3193 if (subtargets
3194 /* TARGET can't be NULL if SUBTARGETS is 0 */
3195 || (reload_completed && !reg_mentioned_p (target, source)))
3196 {
3197 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3198 {
3199 if (generate)
3200 {
3201 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3202
3203 emit_constant_insn (cond,
3204 gen_rtx_SET (VOIDmode, sub,
3205 GEN_INT (val)));
3206 emit_constant_insn (cond,
3207 gen_rtx_SET (VOIDmode, target,
3208 gen_rtx_fmt_ee (code, mode,
3209 source, sub)));
3210 }
3211 return 2;
3212 }
3213 }
3214
3215 if (code == XOR)
3216 break;
3217
3218 /* Convert.
3219 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3220 and the remainder 0s for e.g. 0xfff00000)
3221 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3222
3223 This can be done in 2 instructions by using shifts with mov or mvn.
3224 e.g. for
3225 x = x | 0xfff00000;
3226 we generate.
3227 mvn r0, r0, asl #12
3228 mvn r0, r0, lsr #12 */
3229 if (set_sign_bit_copies > 8
3230 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3231 {
3232 if (generate)
3233 {
3234 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3235 rtx shift = GEN_INT (set_sign_bit_copies);
3236
3237 emit_constant_insn
3238 (cond,
3239 gen_rtx_SET (VOIDmode, sub,
3240 gen_rtx_NOT (mode,
3241 gen_rtx_ASHIFT (mode,
3242 source,
3243 shift))));
3244 emit_constant_insn
3245 (cond,
3246 gen_rtx_SET (VOIDmode, target,
3247 gen_rtx_NOT (mode,
3248 gen_rtx_LSHIFTRT (mode, sub,
3249 shift))));
3250 }
3251 return 2;
3252 }
3253
3254 /* Convert
3255 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3256 to
3257 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3258
3259 For eg. r0 = r0 | 0xfff
3260 mvn r0, r0, lsr #12
3261 mvn r0, r0, asl #12
3262
3263 */
3264 if (set_zero_bit_copies > 8
3265 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3266 {
3267 if (generate)
3268 {
3269 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3270 rtx shift = GEN_INT (set_zero_bit_copies);
3271
3272 emit_constant_insn
3273 (cond,
3274 gen_rtx_SET (VOIDmode, sub,
3275 gen_rtx_NOT (mode,
3276 gen_rtx_LSHIFTRT (mode,
3277 source,
3278 shift))));
3279 emit_constant_insn
3280 (cond,
3281 gen_rtx_SET (VOIDmode, target,
3282 gen_rtx_NOT (mode,
3283 gen_rtx_ASHIFT (mode, sub,
3284 shift))));
3285 }
3286 return 2;
3287 }
3288
3289 /* This will never be reached for Thumb2 because orn is a valid
3290 instruction. This is for Thumb1 and the ARM 32 bit cases.
3291
3292 x = y | constant (such that ~constant is a valid constant)
3293 Transform this to
3294 x = ~(~y & ~constant).
3295 */
3296 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3297 {
3298 if (generate)
3299 {
3300 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3301 emit_constant_insn (cond,
3302 gen_rtx_SET (VOIDmode, sub,
3303 gen_rtx_NOT (mode, source)));
3304 source = sub;
3305 if (subtargets)
3306 sub = gen_reg_rtx (mode);
3307 emit_constant_insn (cond,
3308 gen_rtx_SET (VOIDmode, sub,
3309 gen_rtx_AND (mode, source,
3310 GEN_INT (temp1))));
3311 emit_constant_insn (cond,
3312 gen_rtx_SET (VOIDmode, target,
3313 gen_rtx_NOT (mode, sub)));
3314 }
3315 return 3;
3316 }
3317 break;
3318
3319 case AND:
3320 /* See if two shifts will do 2 or more insn's worth of work. */
3321 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3322 {
3323 HOST_WIDE_INT shift_mask = ((0xffffffff
3324 << (32 - clear_sign_bit_copies))
3325 & 0xffffffff);
3326
3327 if ((remainder | shift_mask) != 0xffffffff)
3328 {
3329 if (generate)
3330 {
3331 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3332 insns = arm_gen_constant (AND, mode, cond,
3333 remainder | shift_mask,
3334 new_src, source, subtargets, 1);
3335 source = new_src;
3336 }
3337 else
3338 {
3339 rtx targ = subtargets ? NULL_RTX : target;
3340 insns = arm_gen_constant (AND, mode, cond,
3341 remainder | shift_mask,
3342 targ, source, subtargets, 0);
3343 }
3344 }
3345
3346 if (generate)
3347 {
3348 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3349 rtx shift = GEN_INT (clear_sign_bit_copies);
3350
3351 emit_insn (gen_ashlsi3 (new_src, source, shift));
3352 emit_insn (gen_lshrsi3 (target, new_src, shift));
3353 }
3354
3355 return insns + 2;
3356 }
3357
3358 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3359 {
3360 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3361
3362 if ((remainder | shift_mask) != 0xffffffff)
3363 {
3364 if (generate)
3365 {
3366 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3367
3368 insns = arm_gen_constant (AND, mode, cond,
3369 remainder | shift_mask,
3370 new_src, source, subtargets, 1);
3371 source = new_src;
3372 }
3373 else
3374 {
3375 rtx targ = subtargets ? NULL_RTX : target;
3376
3377 insns = arm_gen_constant (AND, mode, cond,
3378 remainder | shift_mask,
3379 targ, source, subtargets, 0);
3380 }
3381 }
3382
3383 if (generate)
3384 {
3385 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3386 rtx shift = GEN_INT (clear_zero_bit_copies);
3387
3388 emit_insn (gen_lshrsi3 (new_src, source, shift));
3389 emit_insn (gen_ashlsi3 (target, new_src, shift));
3390 }
3391
3392 return insns + 2;
3393 }
3394
3395 break;
3396
3397 default:
3398 break;
3399 }
3400
3401 /* Calculate what the instruction sequences would be if we generated it
3402 normally, negated, or inverted. */
3403 if (code == AND)
3404 /* AND cannot be split into multiple insns, so invert and use BIC. */
3405 insns = 99;
3406 else
3407 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3408
3409 if (can_negate)
3410 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3411 &neg_immediates);
3412 else
3413 neg_insns = 99;
3414
3415 if (can_invert || final_invert)
3416 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3417 &inv_immediates);
3418 else
3419 inv_insns = 99;
3420
3421 immediates = &pos_immediates;
3422
3423 /* Is the negated immediate sequence more efficient? */
3424 if (neg_insns < insns && neg_insns <= inv_insns)
3425 {
3426 insns = neg_insns;
3427 immediates = &neg_immediates;
3428 }
3429 else
3430 can_negate = 0;
3431
3432 /* Is the inverted immediate sequence more efficient?
3433 We must allow for an extra NOT instruction for XOR operations, although
3434 there is some chance that the final 'mvn' will get optimized later. */
3435 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3436 {
3437 insns = inv_insns;
3438 immediates = &inv_immediates;
3439 }
3440 else
3441 {
3442 can_invert = 0;
3443 final_invert = 0;
3444 }
3445
3446 /* Now output the chosen sequence as instructions. */
3447 if (generate)
3448 {
3449 for (i = 0; i < insns; i++)
3450 {
3451 rtx new_src, temp1_rtx;
3452
3453 temp1 = immediates->i[i];
3454
3455 if (code == SET || code == MINUS)
3456 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3457 else if ((final_invert || i < (insns - 1)) && subtargets)
3458 new_src = gen_reg_rtx (mode);
3459 else
3460 new_src = target;
3461
3462 if (can_invert)
3463 temp1 = ~temp1;
3464 else if (can_negate)
3465 temp1 = -temp1;
3466
3467 temp1 = trunc_int_for_mode (temp1, mode);
3468 temp1_rtx = GEN_INT (temp1);
3469
3470 if (code == SET)
3471 ;
3472 else if (code == MINUS)
3473 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3474 else
3475 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3476
3477 emit_constant_insn (cond,
3478 gen_rtx_SET (VOIDmode, new_src,
3479 temp1_rtx));
3480 source = new_src;
3481
3482 if (code == SET)
3483 {
3484 can_negate = can_invert;
3485 can_invert = 0;
3486 code = PLUS;
3487 }
3488 else if (code == MINUS)
3489 code = PLUS;
3490 }
3491 }
3492
3493 if (final_invert)
3494 {
3495 if (generate)
3496 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3497 gen_rtx_NOT (mode, source)));
3498 insns++;
3499 }
3500
3501 return insns;
3502 }
3503
3504 /* Canonicalize a comparison so that we are more likely to recognize it.
3505 This can be done for a few constant compares, where we can make the
3506 immediate value easier to load. */
3507
3508 enum rtx_code
3509 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3510 {
3511 enum machine_mode mode;
3512 unsigned HOST_WIDE_INT i, maxval;
3513
3514 mode = GET_MODE (*op0);
3515 if (mode == VOIDmode)
3516 mode = GET_MODE (*op1);
3517
3518 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3519
3520 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3521 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3522 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3523 for GTU/LEU in Thumb mode. */
3524 if (mode == DImode)
3525 {
3526 rtx tem;
3527
3528 if (code == GT || code == LE
3529 || (!TARGET_ARM && (code == GTU || code == LEU)))
3530 {
3531 /* Missing comparison. First try to use an available
3532 comparison. */
3533 if (CONST_INT_P (*op1))
3534 {
3535 i = INTVAL (*op1);
3536 switch (code)
3537 {
3538 case GT:
3539 case LE:
3540 if (i != maxval
3541 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3542 {
3543 *op1 = GEN_INT (i + 1);
3544 return code == GT ? GE : LT;
3545 }
3546 break;
3547 case GTU:
3548 case LEU:
3549 if (i != ~((unsigned HOST_WIDE_INT) 0)
3550 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3551 {
3552 *op1 = GEN_INT (i + 1);
3553 return code == GTU ? GEU : LTU;
3554 }
3555 break;
3556 default:
3557 gcc_unreachable ();
3558 }
3559 }
3560
3561 /* If that did not work, reverse the condition. */
3562 tem = *op0;
3563 *op0 = *op1;
3564 *op1 = tem;
3565 return swap_condition (code);
3566 }
3567
3568 return code;
3569 }
3570
3571 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3572 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3573 to facilitate possible combining with a cmp into 'ands'. */
3574 if (mode == SImode
3575 && GET_CODE (*op0) == ZERO_EXTEND
3576 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3577 && GET_MODE (XEXP (*op0, 0)) == QImode
3578 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3579 && subreg_lowpart_p (XEXP (*op0, 0))
3580 && *op1 == const0_rtx)
3581 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3582 GEN_INT (255));
3583
3584 /* Comparisons smaller than DImode. Only adjust comparisons against
3585 an out-of-range constant. */
3586 if (!CONST_INT_P (*op1)
3587 || const_ok_for_arm (INTVAL (*op1))
3588 || const_ok_for_arm (- INTVAL (*op1)))
3589 return code;
3590
3591 i = INTVAL (*op1);
3592
3593 switch (code)
3594 {
3595 case EQ:
3596 case NE:
3597 return code;
3598
3599 case GT:
3600 case LE:
3601 if (i != maxval
3602 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3603 {
3604 *op1 = GEN_INT (i + 1);
3605 return code == GT ? GE : LT;
3606 }
3607 break;
3608
3609 case GE:
3610 case LT:
3611 if (i != ~maxval
3612 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3613 {
3614 *op1 = GEN_INT (i - 1);
3615 return code == GE ? GT : LE;
3616 }
3617 break;
3618
3619 case GTU:
3620 case LEU:
3621 if (i != ~((unsigned HOST_WIDE_INT) 0)
3622 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3623 {
3624 *op1 = GEN_INT (i + 1);
3625 return code == GTU ? GEU : LTU;
3626 }
3627 break;
3628
3629 case GEU:
3630 case LTU:
3631 if (i != 0
3632 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3633 {
3634 *op1 = GEN_INT (i - 1);
3635 return code == GEU ? GTU : LEU;
3636 }
3637 break;
3638
3639 default:
3640 gcc_unreachable ();
3641 }
3642
3643 return code;
3644 }
3645
3646
3647 /* Define how to find the value returned by a function. */
3648
3649 static rtx
3650 arm_function_value(const_tree type, const_tree func,
3651 bool outgoing ATTRIBUTE_UNUSED)
3652 {
3653 enum machine_mode mode;
3654 int unsignedp ATTRIBUTE_UNUSED;
3655 rtx r ATTRIBUTE_UNUSED;
3656
3657 mode = TYPE_MODE (type);
3658
3659 if (TARGET_AAPCS_BASED)
3660 return aapcs_allocate_return_reg (mode, type, func);
3661
3662 /* Promote integer types. */
3663 if (INTEGRAL_TYPE_P (type))
3664 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3665
3666 /* Promotes small structs returned in a register to full-word size
3667 for big-endian AAPCS. */
3668 if (arm_return_in_msb (type))
3669 {
3670 HOST_WIDE_INT size = int_size_in_bytes (type);
3671 if (size % UNITS_PER_WORD != 0)
3672 {
3673 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3674 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3675 }
3676 }
3677
3678 return arm_libcall_value_1 (mode);
3679 }
3680
3681 static int
3682 libcall_eq (const void *p1, const void *p2)
3683 {
3684 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3685 }
3686
3687 static hashval_t
3688 libcall_hash (const void *p1)
3689 {
3690 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3691 }
3692
3693 static void
3694 add_libcall (htab_t htab, rtx libcall)
3695 {
3696 *htab_find_slot (htab, libcall, INSERT) = libcall;
3697 }
3698
3699 static bool
3700 arm_libcall_uses_aapcs_base (const_rtx libcall)
3701 {
3702 static bool init_done = false;
3703 static htab_t libcall_htab;
3704
3705 if (!init_done)
3706 {
3707 init_done = true;
3708
3709 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3710 NULL);
3711 add_libcall (libcall_htab,
3712 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3713 add_libcall (libcall_htab,
3714 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3715 add_libcall (libcall_htab,
3716 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3717 add_libcall (libcall_htab,
3718 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3719
3720 add_libcall (libcall_htab,
3721 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3722 add_libcall (libcall_htab,
3723 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3724 add_libcall (libcall_htab,
3725 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3726 add_libcall (libcall_htab,
3727 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3728
3729 add_libcall (libcall_htab,
3730 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3731 add_libcall (libcall_htab,
3732 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3733 add_libcall (libcall_htab,
3734 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3735 add_libcall (libcall_htab,
3736 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3737 add_libcall (libcall_htab,
3738 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3739 add_libcall (libcall_htab,
3740 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3741 add_libcall (libcall_htab,
3742 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3743 add_libcall (libcall_htab,
3744 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3745
3746 /* Values from double-precision helper functions are returned in core
3747 registers if the selected core only supports single-precision
3748 arithmetic, even if we are using the hard-float ABI. The same is
3749 true for single-precision helpers, but we will never be using the
3750 hard-float ABI on a CPU which doesn't support single-precision
3751 operations in hardware. */
3752 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3753 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3754 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3755 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3756 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3757 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3758 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3759 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3760 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3761 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3762 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3763 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3764 SFmode));
3765 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3766 DFmode));
3767 }
3768
3769 return libcall && htab_find (libcall_htab, libcall) != NULL;
3770 }
3771
3772 static rtx
3773 arm_libcall_value_1 (enum machine_mode mode)
3774 {
3775 if (TARGET_AAPCS_BASED)
3776 return aapcs_libcall_value (mode);
3777 else if (TARGET_IWMMXT_ABI
3778 && arm_vector_mode_supported_p (mode))
3779 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3780 else
3781 return gen_rtx_REG (mode, ARG_REGISTER (1));
3782 }
3783
3784 /* Define how to find the value returned by a library function
3785 assuming the value has mode MODE. */
3786
3787 static rtx
3788 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3789 {
3790 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3791 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3792 {
3793 /* The following libcalls return their result in integer registers,
3794 even though they return a floating point value. */
3795 if (arm_libcall_uses_aapcs_base (libcall))
3796 return gen_rtx_REG (mode, ARG_REGISTER(1));
3797
3798 }
3799
3800 return arm_libcall_value_1 (mode);
3801 }
3802
3803 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3804
3805 static bool
3806 arm_function_value_regno_p (const unsigned int regno)
3807 {
3808 if (regno == ARG_REGISTER (1)
3809 || (TARGET_32BIT
3810 && TARGET_AAPCS_BASED
3811 && TARGET_VFP
3812 && TARGET_HARD_FLOAT
3813 && regno == FIRST_VFP_REGNUM)
3814 || (TARGET_IWMMXT_ABI
3815 && regno == FIRST_IWMMXT_REGNUM))
3816 return true;
3817
3818 return false;
3819 }
3820
3821 /* Determine the amount of memory needed to store the possible return
3822 registers of an untyped call. */
3823 int
3824 arm_apply_result_size (void)
3825 {
3826 int size = 16;
3827
3828 if (TARGET_32BIT)
3829 {
3830 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3831 size += 32;
3832 if (TARGET_IWMMXT_ABI)
3833 size += 8;
3834 }
3835
3836 return size;
3837 }
3838
3839 /* Decide whether TYPE should be returned in memory (true)
3840 or in a register (false). FNTYPE is the type of the function making
3841 the call. */
3842 static bool
3843 arm_return_in_memory (const_tree type, const_tree fntype)
3844 {
3845 HOST_WIDE_INT size;
3846
3847 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3848
3849 if (TARGET_AAPCS_BASED)
3850 {
3851 /* Simple, non-aggregate types (ie not including vectors and
3852 complex) are always returned in a register (or registers).
3853 We don't care about which register here, so we can short-cut
3854 some of the detail. */
3855 if (!AGGREGATE_TYPE_P (type)
3856 && TREE_CODE (type) != VECTOR_TYPE
3857 && TREE_CODE (type) != COMPLEX_TYPE)
3858 return false;
3859
3860 /* Any return value that is no larger than one word can be
3861 returned in r0. */
3862 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3863 return false;
3864
3865 /* Check any available co-processors to see if they accept the
3866 type as a register candidate (VFP, for example, can return
3867 some aggregates in consecutive registers). These aren't
3868 available if the call is variadic. */
3869 if (aapcs_select_return_coproc (type, fntype) >= 0)
3870 return false;
3871
3872 /* Vector values should be returned using ARM registers, not
3873 memory (unless they're over 16 bytes, which will break since
3874 we only have four call-clobbered registers to play with). */
3875 if (TREE_CODE (type) == VECTOR_TYPE)
3876 return (size < 0 || size > (4 * UNITS_PER_WORD));
3877
3878 /* The rest go in memory. */
3879 return true;
3880 }
3881
3882 if (TREE_CODE (type) == VECTOR_TYPE)
3883 return (size < 0 || size > (4 * UNITS_PER_WORD));
3884
3885 if (!AGGREGATE_TYPE_P (type) &&
3886 (TREE_CODE (type) != VECTOR_TYPE))
3887 /* All simple types are returned in registers. */
3888 return false;
3889
3890 if (arm_abi != ARM_ABI_APCS)
3891 {
3892 /* ATPCS and later return aggregate types in memory only if they are
3893 larger than a word (or are variable size). */
3894 return (size < 0 || size > UNITS_PER_WORD);
3895 }
3896
3897 /* For the arm-wince targets we choose to be compatible with Microsoft's
3898 ARM and Thumb compilers, which always return aggregates in memory. */
3899 #ifndef ARM_WINCE
3900 /* All structures/unions bigger than one word are returned in memory.
3901 Also catch the case where int_size_in_bytes returns -1. In this case
3902 the aggregate is either huge or of variable size, and in either case
3903 we will want to return it via memory and not in a register. */
3904 if (size < 0 || size > UNITS_PER_WORD)
3905 return true;
3906
3907 if (TREE_CODE (type) == RECORD_TYPE)
3908 {
3909 tree field;
3910
3911 /* For a struct the APCS says that we only return in a register
3912 if the type is 'integer like' and every addressable element
3913 has an offset of zero. For practical purposes this means
3914 that the structure can have at most one non bit-field element
3915 and that this element must be the first one in the structure. */
3916
3917 /* Find the first field, ignoring non FIELD_DECL things which will
3918 have been created by C++. */
3919 for (field = TYPE_FIELDS (type);
3920 field && TREE_CODE (field) != FIELD_DECL;
3921 field = DECL_CHAIN (field))
3922 continue;
3923
3924 if (field == NULL)
3925 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3926
3927 /* Check that the first field is valid for returning in a register. */
3928
3929 /* ... Floats are not allowed */
3930 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3931 return true;
3932
3933 /* ... Aggregates that are not themselves valid for returning in
3934 a register are not allowed. */
3935 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3936 return true;
3937
3938 /* Now check the remaining fields, if any. Only bitfields are allowed,
3939 since they are not addressable. */
3940 for (field = DECL_CHAIN (field);
3941 field;
3942 field = DECL_CHAIN (field))
3943 {
3944 if (TREE_CODE (field) != FIELD_DECL)
3945 continue;
3946
3947 if (!DECL_BIT_FIELD_TYPE (field))
3948 return true;
3949 }
3950
3951 return false;
3952 }
3953
3954 if (TREE_CODE (type) == UNION_TYPE)
3955 {
3956 tree field;
3957
3958 /* Unions can be returned in registers if every element is
3959 integral, or can be returned in an integer register. */
3960 for (field = TYPE_FIELDS (type);
3961 field;
3962 field = DECL_CHAIN (field))
3963 {
3964 if (TREE_CODE (field) != FIELD_DECL)
3965 continue;
3966
3967 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3968 return true;
3969
3970 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3971 return true;
3972 }
3973
3974 return false;
3975 }
3976 #endif /* not ARM_WINCE */
3977
3978 /* Return all other types in memory. */
3979 return true;
3980 }
3981
3982 const struct pcs_attribute_arg
3983 {
3984 const char *arg;
3985 enum arm_pcs value;
3986 } pcs_attribute_args[] =
3987 {
3988 {"aapcs", ARM_PCS_AAPCS},
3989 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3990 #if 0
3991 /* We could recognize these, but changes would be needed elsewhere
3992 * to implement them. */
3993 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3994 {"atpcs", ARM_PCS_ATPCS},
3995 {"apcs", ARM_PCS_APCS},
3996 #endif
3997 {NULL, ARM_PCS_UNKNOWN}
3998 };
3999
4000 static enum arm_pcs
4001 arm_pcs_from_attribute (tree attr)
4002 {
4003 const struct pcs_attribute_arg *ptr;
4004 const char *arg;
4005
4006 /* Get the value of the argument. */
4007 if (TREE_VALUE (attr) == NULL_TREE
4008 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4009 return ARM_PCS_UNKNOWN;
4010
4011 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4012
4013 /* Check it against the list of known arguments. */
4014 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4015 if (streq (arg, ptr->arg))
4016 return ptr->value;
4017
4018 /* An unrecognized interrupt type. */
4019 return ARM_PCS_UNKNOWN;
4020 }
4021
4022 /* Get the PCS variant to use for this call. TYPE is the function's type
4023 specification, DECL is the specific declartion. DECL may be null if
4024 the call could be indirect or if this is a library call. */
4025 static enum arm_pcs
4026 arm_get_pcs_model (const_tree type, const_tree decl)
4027 {
4028 bool user_convention = false;
4029 enum arm_pcs user_pcs = arm_pcs_default;
4030 tree attr;
4031
4032 gcc_assert (type);
4033
4034 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4035 if (attr)
4036 {
4037 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4038 user_convention = true;
4039 }
4040
4041 if (TARGET_AAPCS_BASED)
4042 {
4043 /* Detect varargs functions. These always use the base rules
4044 (no argument is ever a candidate for a co-processor
4045 register). */
4046 bool base_rules = stdarg_p (type);
4047
4048 if (user_convention)
4049 {
4050 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4051 sorry ("non-AAPCS derived PCS variant");
4052 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4053 error ("variadic functions must use the base AAPCS variant");
4054 }
4055
4056 if (base_rules)
4057 return ARM_PCS_AAPCS;
4058 else if (user_convention)
4059 return user_pcs;
4060 else if (decl && flag_unit_at_a_time)
4061 {
4062 /* Local functions never leak outside this compilation unit,
4063 so we are free to use whatever conventions are
4064 appropriate. */
4065 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4066 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4067 if (i && i->local)
4068 return ARM_PCS_AAPCS_LOCAL;
4069 }
4070 }
4071 else if (user_convention && user_pcs != arm_pcs_default)
4072 sorry ("PCS variant");
4073
4074 /* For everything else we use the target's default. */
4075 return arm_pcs_default;
4076 }
4077
4078
4079 static void
4080 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4081 const_tree fntype ATTRIBUTE_UNUSED,
4082 rtx libcall ATTRIBUTE_UNUSED,
4083 const_tree fndecl ATTRIBUTE_UNUSED)
4084 {
4085 /* Record the unallocated VFP registers. */
4086 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4087 pcum->aapcs_vfp_reg_alloc = 0;
4088 }
4089
4090 /* Walk down the type tree of TYPE counting consecutive base elements.
4091 If *MODEP is VOIDmode, then set it to the first valid floating point
4092 type. If a non-floating point type is found, or if a floating point
4093 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4094 otherwise return the count in the sub-tree. */
4095 static int
4096 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4097 {
4098 enum machine_mode mode;
4099 HOST_WIDE_INT size;
4100
4101 switch (TREE_CODE (type))
4102 {
4103 case REAL_TYPE:
4104 mode = TYPE_MODE (type);
4105 if (mode != DFmode && mode != SFmode)
4106 return -1;
4107
4108 if (*modep == VOIDmode)
4109 *modep = mode;
4110
4111 if (*modep == mode)
4112 return 1;
4113
4114 break;
4115
4116 case COMPLEX_TYPE:
4117 mode = TYPE_MODE (TREE_TYPE (type));
4118 if (mode != DFmode && mode != SFmode)
4119 return -1;
4120
4121 if (*modep == VOIDmode)
4122 *modep = mode;
4123
4124 if (*modep == mode)
4125 return 2;
4126
4127 break;
4128
4129 case VECTOR_TYPE:
4130 /* Use V2SImode and V4SImode as representatives of all 64-bit
4131 and 128-bit vector types, whether or not those modes are
4132 supported with the present options. */
4133 size = int_size_in_bytes (type);
4134 switch (size)
4135 {
4136 case 8:
4137 mode = V2SImode;
4138 break;
4139 case 16:
4140 mode = V4SImode;
4141 break;
4142 default:
4143 return -1;
4144 }
4145
4146 if (*modep == VOIDmode)
4147 *modep = mode;
4148
4149 /* Vector modes are considered to be opaque: two vectors are
4150 equivalent for the purposes of being homogeneous aggregates
4151 if they are the same size. */
4152 if (*modep == mode)
4153 return 1;
4154
4155 break;
4156
4157 case ARRAY_TYPE:
4158 {
4159 int count;
4160 tree index = TYPE_DOMAIN (type);
4161
4162 /* Can't handle incomplete types. */
4163 if (!COMPLETE_TYPE_P (type))
4164 return -1;
4165
4166 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4167 if (count == -1
4168 || !index
4169 || !TYPE_MAX_VALUE (index)
4170 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4171 || !TYPE_MIN_VALUE (index)
4172 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4173 || count < 0)
4174 return -1;
4175
4176 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4177 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4178
4179 /* There must be no padding. */
4180 if (!host_integerp (TYPE_SIZE (type), 1)
4181 || (tree_low_cst (TYPE_SIZE (type), 1)
4182 != count * GET_MODE_BITSIZE (*modep)))
4183 return -1;
4184
4185 return count;
4186 }
4187
4188 case RECORD_TYPE:
4189 {
4190 int count = 0;
4191 int sub_count;
4192 tree field;
4193
4194 /* Can't handle incomplete types. */
4195 if (!COMPLETE_TYPE_P (type))
4196 return -1;
4197
4198 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4199 {
4200 if (TREE_CODE (field) != FIELD_DECL)
4201 continue;
4202
4203 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4204 if (sub_count < 0)
4205 return -1;
4206 count += sub_count;
4207 }
4208
4209 /* There must be no padding. */
4210 if (!host_integerp (TYPE_SIZE (type), 1)
4211 || (tree_low_cst (TYPE_SIZE (type), 1)
4212 != count * GET_MODE_BITSIZE (*modep)))
4213 return -1;
4214
4215 return count;
4216 }
4217
4218 case UNION_TYPE:
4219 case QUAL_UNION_TYPE:
4220 {
4221 /* These aren't very interesting except in a degenerate case. */
4222 int count = 0;
4223 int sub_count;
4224 tree field;
4225
4226 /* Can't handle incomplete types. */
4227 if (!COMPLETE_TYPE_P (type))
4228 return -1;
4229
4230 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4231 {
4232 if (TREE_CODE (field) != FIELD_DECL)
4233 continue;
4234
4235 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4236 if (sub_count < 0)
4237 return -1;
4238 count = count > sub_count ? count : sub_count;
4239 }
4240
4241 /* There must be no padding. */
4242 if (!host_integerp (TYPE_SIZE (type), 1)
4243 || (tree_low_cst (TYPE_SIZE (type), 1)
4244 != count * GET_MODE_BITSIZE (*modep)))
4245 return -1;
4246
4247 return count;
4248 }
4249
4250 default:
4251 break;
4252 }
4253
4254 return -1;
4255 }
4256
4257 /* Return true if PCS_VARIANT should use VFP registers. */
4258 static bool
4259 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4260 {
4261 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4262 {
4263 static bool seen_thumb1_vfp = false;
4264
4265 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4266 {
4267 sorry ("Thumb-1 hard-float VFP ABI");
4268 /* sorry() is not immediately fatal, so only display this once. */
4269 seen_thumb1_vfp = true;
4270 }
4271
4272 return true;
4273 }
4274
4275 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4276 return false;
4277
4278 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4279 (TARGET_VFP_DOUBLE || !is_double));
4280 }
4281
4282 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4283 suitable for passing or returning in VFP registers for the PCS
4284 variant selected. If it is, then *BASE_MODE is updated to contain
4285 a machine mode describing each element of the argument's type and
4286 *COUNT to hold the number of such elements. */
4287 static bool
4288 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4289 enum machine_mode mode, const_tree type,
4290 enum machine_mode *base_mode, int *count)
4291 {
4292 enum machine_mode new_mode = VOIDmode;
4293
4294 /* If we have the type information, prefer that to working things
4295 out from the mode. */
4296 if (type)
4297 {
4298 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4299
4300 if (ag_count > 0 && ag_count <= 4)
4301 *count = ag_count;
4302 else
4303 return false;
4304 }
4305 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4306 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4307 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4308 {
4309 *count = 1;
4310 new_mode = mode;
4311 }
4312 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4313 {
4314 *count = 2;
4315 new_mode = (mode == DCmode ? DFmode : SFmode);
4316 }
4317 else
4318 return false;
4319
4320
4321 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4322 return false;
4323
4324 *base_mode = new_mode;
4325 return true;
4326 }
4327
4328 static bool
4329 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4330 enum machine_mode mode, const_tree type)
4331 {
4332 int count ATTRIBUTE_UNUSED;
4333 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4334
4335 if (!use_vfp_abi (pcs_variant, false))
4336 return false;
4337 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4338 &ag_mode, &count);
4339 }
4340
4341 static bool
4342 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4343 const_tree type)
4344 {
4345 if (!use_vfp_abi (pcum->pcs_variant, false))
4346 return false;
4347
4348 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4349 &pcum->aapcs_vfp_rmode,
4350 &pcum->aapcs_vfp_rcount);
4351 }
4352
4353 static bool
4354 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 const_tree type ATTRIBUTE_UNUSED)
4356 {
4357 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4358 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4359 int regno;
4360
4361 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4362 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4363 {
4364 pcum->aapcs_vfp_reg_alloc = mask << regno;
4365 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4366 {
4367 int i;
4368 int rcount = pcum->aapcs_vfp_rcount;
4369 int rshift = shift;
4370 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4371 rtx par;
4372 if (!TARGET_NEON)
4373 {
4374 /* Avoid using unsupported vector modes. */
4375 if (rmode == V2SImode)
4376 rmode = DImode;
4377 else if (rmode == V4SImode)
4378 {
4379 rmode = DImode;
4380 rcount *= 2;
4381 rshift /= 2;
4382 }
4383 }
4384 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4385 for (i = 0; i < rcount; i++)
4386 {
4387 rtx tmp = gen_rtx_REG (rmode,
4388 FIRST_VFP_REGNUM + regno + i * rshift);
4389 tmp = gen_rtx_EXPR_LIST
4390 (VOIDmode, tmp,
4391 GEN_INT (i * GET_MODE_SIZE (rmode)));
4392 XVECEXP (par, 0, i) = tmp;
4393 }
4394
4395 pcum->aapcs_reg = par;
4396 }
4397 else
4398 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4399 return true;
4400 }
4401 return false;
4402 }
4403
4404 static rtx
4405 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4406 enum machine_mode mode,
4407 const_tree type ATTRIBUTE_UNUSED)
4408 {
4409 if (!use_vfp_abi (pcs_variant, false))
4410 return NULL;
4411
4412 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4413 {
4414 int count;
4415 enum machine_mode ag_mode;
4416 int i;
4417 rtx par;
4418 int shift;
4419
4420 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4421 &ag_mode, &count);
4422
4423 if (!TARGET_NEON)
4424 {
4425 if (ag_mode == V2SImode)
4426 ag_mode = DImode;
4427 else if (ag_mode == V4SImode)
4428 {
4429 ag_mode = DImode;
4430 count *= 2;
4431 }
4432 }
4433 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4434 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4435 for (i = 0; i < count; i++)
4436 {
4437 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4438 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4439 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4440 XVECEXP (par, 0, i) = tmp;
4441 }
4442
4443 return par;
4444 }
4445
4446 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4447 }
4448
4449 static void
4450 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4451 enum machine_mode mode ATTRIBUTE_UNUSED,
4452 const_tree type ATTRIBUTE_UNUSED)
4453 {
4454 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4455 pcum->aapcs_vfp_reg_alloc = 0;
4456 return;
4457 }
4458
4459 #define AAPCS_CP(X) \
4460 { \
4461 aapcs_ ## X ## _cum_init, \
4462 aapcs_ ## X ## _is_call_candidate, \
4463 aapcs_ ## X ## _allocate, \
4464 aapcs_ ## X ## _is_return_candidate, \
4465 aapcs_ ## X ## _allocate_return_reg, \
4466 aapcs_ ## X ## _advance \
4467 }
4468
4469 /* Table of co-processors that can be used to pass arguments in
4470 registers. Idealy no arugment should be a candidate for more than
4471 one co-processor table entry, but the table is processed in order
4472 and stops after the first match. If that entry then fails to put
4473 the argument into a co-processor register, the argument will go on
4474 the stack. */
4475 static struct
4476 {
4477 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4478 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4479
4480 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4481 BLKmode) is a candidate for this co-processor's registers; this
4482 function should ignore any position-dependent state in
4483 CUMULATIVE_ARGS and only use call-type dependent information. */
4484 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4485
4486 /* Return true if the argument does get a co-processor register; it
4487 should set aapcs_reg to an RTX of the register allocated as is
4488 required for a return from FUNCTION_ARG. */
4489 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4490
4491 /* Return true if a result of mode MODE (or type TYPE if MODE is
4492 BLKmode) is can be returned in this co-processor's registers. */
4493 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4494
4495 /* Allocate and return an RTX element to hold the return type of a
4496 call, this routine must not fail and will only be called if
4497 is_return_candidate returned true with the same parameters. */
4498 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4499
4500 /* Finish processing this argument and prepare to start processing
4501 the next one. */
4502 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4503 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4504 {
4505 AAPCS_CP(vfp)
4506 };
4507
4508 #undef AAPCS_CP
4509
4510 static int
4511 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4512 const_tree type)
4513 {
4514 int i;
4515
4516 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4517 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4518 return i;
4519
4520 return -1;
4521 }
4522
4523 static int
4524 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4525 {
4526 /* We aren't passed a decl, so we can't check that a call is local.
4527 However, it isn't clear that that would be a win anyway, since it
4528 might limit some tail-calling opportunities. */
4529 enum arm_pcs pcs_variant;
4530
4531 if (fntype)
4532 {
4533 const_tree fndecl = NULL_TREE;
4534
4535 if (TREE_CODE (fntype) == FUNCTION_DECL)
4536 {
4537 fndecl = fntype;
4538 fntype = TREE_TYPE (fntype);
4539 }
4540
4541 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4542 }
4543 else
4544 pcs_variant = arm_pcs_default;
4545
4546 if (pcs_variant != ARM_PCS_AAPCS)
4547 {
4548 int i;
4549
4550 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4551 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4552 TYPE_MODE (type),
4553 type))
4554 return i;
4555 }
4556 return -1;
4557 }
4558
4559 static rtx
4560 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4561 const_tree fntype)
4562 {
4563 /* We aren't passed a decl, so we can't check that a call is local.
4564 However, it isn't clear that that would be a win anyway, since it
4565 might limit some tail-calling opportunities. */
4566 enum arm_pcs pcs_variant;
4567 int unsignedp ATTRIBUTE_UNUSED;
4568
4569 if (fntype)
4570 {
4571 const_tree fndecl = NULL_TREE;
4572
4573 if (TREE_CODE (fntype) == FUNCTION_DECL)
4574 {
4575 fndecl = fntype;
4576 fntype = TREE_TYPE (fntype);
4577 }
4578
4579 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4580 }
4581 else
4582 pcs_variant = arm_pcs_default;
4583
4584 /* Promote integer types. */
4585 if (type && INTEGRAL_TYPE_P (type))
4586 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4587
4588 if (pcs_variant != ARM_PCS_AAPCS)
4589 {
4590 int i;
4591
4592 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4593 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4594 type))
4595 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4596 mode, type);
4597 }
4598
4599 /* Promotes small structs returned in a register to full-word size
4600 for big-endian AAPCS. */
4601 if (type && arm_return_in_msb (type))
4602 {
4603 HOST_WIDE_INT size = int_size_in_bytes (type);
4604 if (size % UNITS_PER_WORD != 0)
4605 {
4606 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4607 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4608 }
4609 }
4610
4611 return gen_rtx_REG (mode, R0_REGNUM);
4612 }
4613
4614 static rtx
4615 aapcs_libcall_value (enum machine_mode mode)
4616 {
4617 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4618 && GET_MODE_SIZE (mode) <= 4)
4619 mode = SImode;
4620
4621 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4622 }
4623
4624 /* Lay out a function argument using the AAPCS rules. The rule
4625 numbers referred to here are those in the AAPCS. */
4626 static void
4627 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4628 const_tree type, bool named)
4629 {
4630 int nregs, nregs2;
4631 int ncrn;
4632
4633 /* We only need to do this once per argument. */
4634 if (pcum->aapcs_arg_processed)
4635 return;
4636
4637 pcum->aapcs_arg_processed = true;
4638
4639 /* Special case: if named is false then we are handling an incoming
4640 anonymous argument which is on the stack. */
4641 if (!named)
4642 return;
4643
4644 /* Is this a potential co-processor register candidate? */
4645 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4646 {
4647 int slot = aapcs_select_call_coproc (pcum, mode, type);
4648 pcum->aapcs_cprc_slot = slot;
4649
4650 /* We don't have to apply any of the rules from part B of the
4651 preparation phase, these are handled elsewhere in the
4652 compiler. */
4653
4654 if (slot >= 0)
4655 {
4656 /* A Co-processor register candidate goes either in its own
4657 class of registers or on the stack. */
4658 if (!pcum->aapcs_cprc_failed[slot])
4659 {
4660 /* C1.cp - Try to allocate the argument to co-processor
4661 registers. */
4662 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4663 return;
4664
4665 /* C2.cp - Put the argument on the stack and note that we
4666 can't assign any more candidates in this slot. We also
4667 need to note that we have allocated stack space, so that
4668 we won't later try to split a non-cprc candidate between
4669 core registers and the stack. */
4670 pcum->aapcs_cprc_failed[slot] = true;
4671 pcum->can_split = false;
4672 }
4673
4674 /* We didn't get a register, so this argument goes on the
4675 stack. */
4676 gcc_assert (pcum->can_split == false);
4677 return;
4678 }
4679 }
4680
4681 /* C3 - For double-word aligned arguments, round the NCRN up to the
4682 next even number. */
4683 ncrn = pcum->aapcs_ncrn;
4684 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4685 ncrn++;
4686
4687 nregs = ARM_NUM_REGS2(mode, type);
4688
4689 /* Sigh, this test should really assert that nregs > 0, but a GCC
4690 extension allows empty structs and then gives them empty size; it
4691 then allows such a structure to be passed by value. For some of
4692 the code below we have to pretend that such an argument has
4693 non-zero size so that we 'locate' it correctly either in
4694 registers or on the stack. */
4695 gcc_assert (nregs >= 0);
4696
4697 nregs2 = nregs ? nregs : 1;
4698
4699 /* C4 - Argument fits entirely in core registers. */
4700 if (ncrn + nregs2 <= NUM_ARG_REGS)
4701 {
4702 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4703 pcum->aapcs_next_ncrn = ncrn + nregs;
4704 return;
4705 }
4706
4707 /* C5 - Some core registers left and there are no arguments already
4708 on the stack: split this argument between the remaining core
4709 registers and the stack. */
4710 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4711 {
4712 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4713 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4714 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4715 return;
4716 }
4717
4718 /* C6 - NCRN is set to 4. */
4719 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4720
4721 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4722 return;
4723 }
4724
4725 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4726 for a call to a function whose data type is FNTYPE.
4727 For a library call, FNTYPE is NULL. */
4728 void
4729 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4730 rtx libname,
4731 tree fndecl ATTRIBUTE_UNUSED)
4732 {
4733 /* Long call handling. */
4734 if (fntype)
4735 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4736 else
4737 pcum->pcs_variant = arm_pcs_default;
4738
4739 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4740 {
4741 if (arm_libcall_uses_aapcs_base (libname))
4742 pcum->pcs_variant = ARM_PCS_AAPCS;
4743
4744 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4745 pcum->aapcs_reg = NULL_RTX;
4746 pcum->aapcs_partial = 0;
4747 pcum->aapcs_arg_processed = false;
4748 pcum->aapcs_cprc_slot = -1;
4749 pcum->can_split = true;
4750
4751 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4752 {
4753 int i;
4754
4755 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4756 {
4757 pcum->aapcs_cprc_failed[i] = false;
4758 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4759 }
4760 }
4761 return;
4762 }
4763
4764 /* Legacy ABIs */
4765
4766 /* On the ARM, the offset starts at 0. */
4767 pcum->nregs = 0;
4768 pcum->iwmmxt_nregs = 0;
4769 pcum->can_split = true;
4770
4771 /* Varargs vectors are treated the same as long long.
4772 named_count avoids having to change the way arm handles 'named' */
4773 pcum->named_count = 0;
4774 pcum->nargs = 0;
4775
4776 if (TARGET_REALLY_IWMMXT && fntype)
4777 {
4778 tree fn_arg;
4779
4780 for (fn_arg = TYPE_ARG_TYPES (fntype);
4781 fn_arg;
4782 fn_arg = TREE_CHAIN (fn_arg))
4783 pcum->named_count += 1;
4784
4785 if (! pcum->named_count)
4786 pcum->named_count = INT_MAX;
4787 }
4788 }
4789
4790
4791 /* Return true if mode/type need doubleword alignment. */
4792 static bool
4793 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4794 {
4795 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4796 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4797 }
4798
4799
4800 /* Determine where to put an argument to a function.
4801 Value is zero to push the argument on the stack,
4802 or a hard register in which to store the argument.
4803
4804 MODE is the argument's machine mode.
4805 TYPE is the data type of the argument (as a tree).
4806 This is null for libcalls where that information may
4807 not be available.
4808 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4809 the preceding args and about the function being called.
4810 NAMED is nonzero if this argument is a named parameter
4811 (otherwise it is an extra parameter matching an ellipsis).
4812
4813 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4814 other arguments are passed on the stack. If (NAMED == 0) (which happens
4815 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4816 defined), say it is passed in the stack (function_prologue will
4817 indeed make it pass in the stack if necessary). */
4818
4819 static rtx
4820 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4821 const_tree type, bool named)
4822 {
4823 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4824 int nregs;
4825
4826 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4827 a call insn (op3 of a call_value insn). */
4828 if (mode == VOIDmode)
4829 return const0_rtx;
4830
4831 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4832 {
4833 aapcs_layout_arg (pcum, mode, type, named);
4834 return pcum->aapcs_reg;
4835 }
4836
4837 /* Varargs vectors are treated the same as long long.
4838 named_count avoids having to change the way arm handles 'named' */
4839 if (TARGET_IWMMXT_ABI
4840 && arm_vector_mode_supported_p (mode)
4841 && pcum->named_count > pcum->nargs + 1)
4842 {
4843 if (pcum->iwmmxt_nregs <= 9)
4844 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4845 else
4846 {
4847 pcum->can_split = false;
4848 return NULL_RTX;
4849 }
4850 }
4851
4852 /* Put doubleword aligned quantities in even register pairs. */
4853 if (pcum->nregs & 1
4854 && ARM_DOUBLEWORD_ALIGN
4855 && arm_needs_doubleword_align (mode, type))
4856 pcum->nregs++;
4857
4858 /* Only allow splitting an arg between regs and memory if all preceding
4859 args were allocated to regs. For args passed by reference we only count
4860 the reference pointer. */
4861 if (pcum->can_split)
4862 nregs = 1;
4863 else
4864 nregs = ARM_NUM_REGS2 (mode, type);
4865
4866 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4867 return NULL_RTX;
4868
4869 return gen_rtx_REG (mode, pcum->nregs);
4870 }
4871
4872 static unsigned int
4873 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4874 {
4875 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4876 ? DOUBLEWORD_ALIGNMENT
4877 : PARM_BOUNDARY);
4878 }
4879
4880 static int
4881 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4882 tree type, bool named)
4883 {
4884 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4885 int nregs = pcum->nregs;
4886
4887 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4888 {
4889 aapcs_layout_arg (pcum, mode, type, named);
4890 return pcum->aapcs_partial;
4891 }
4892
4893 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4894 return 0;
4895
4896 if (NUM_ARG_REGS > nregs
4897 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4898 && pcum->can_split)
4899 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4900
4901 return 0;
4902 }
4903
4904 /* Update the data in PCUM to advance over an argument
4905 of mode MODE and data type TYPE.
4906 (TYPE is null for libcalls where that information may not be available.) */
4907
4908 static void
4909 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4910 const_tree type, bool named)
4911 {
4912 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4913
4914 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4915 {
4916 aapcs_layout_arg (pcum, mode, type, named);
4917
4918 if (pcum->aapcs_cprc_slot >= 0)
4919 {
4920 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4921 type);
4922 pcum->aapcs_cprc_slot = -1;
4923 }
4924
4925 /* Generic stuff. */
4926 pcum->aapcs_arg_processed = false;
4927 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4928 pcum->aapcs_reg = NULL_RTX;
4929 pcum->aapcs_partial = 0;
4930 }
4931 else
4932 {
4933 pcum->nargs += 1;
4934 if (arm_vector_mode_supported_p (mode)
4935 && pcum->named_count > pcum->nargs
4936 && TARGET_IWMMXT_ABI)
4937 pcum->iwmmxt_nregs += 1;
4938 else
4939 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4940 }
4941 }
4942
4943 /* Variable sized types are passed by reference. This is a GCC
4944 extension to the ARM ABI. */
4945
4946 static bool
4947 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4948 enum machine_mode mode ATTRIBUTE_UNUSED,
4949 const_tree type, bool named ATTRIBUTE_UNUSED)
4950 {
4951 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4952 }
4953 \f
4954 /* Encode the current state of the #pragma [no_]long_calls. */
4955 typedef enum
4956 {
4957 OFF, /* No #pragma [no_]long_calls is in effect. */
4958 LONG, /* #pragma long_calls is in effect. */
4959 SHORT /* #pragma no_long_calls is in effect. */
4960 } arm_pragma_enum;
4961
4962 static arm_pragma_enum arm_pragma_long_calls = OFF;
4963
4964 void
4965 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4966 {
4967 arm_pragma_long_calls = LONG;
4968 }
4969
4970 void
4971 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4972 {
4973 arm_pragma_long_calls = SHORT;
4974 }
4975
4976 void
4977 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4978 {
4979 arm_pragma_long_calls = OFF;
4980 }
4981 \f
4982 /* Handle an attribute requiring a FUNCTION_DECL;
4983 arguments as in struct attribute_spec.handler. */
4984 static tree
4985 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4986 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4987 {
4988 if (TREE_CODE (*node) != FUNCTION_DECL)
4989 {
4990 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4991 name);
4992 *no_add_attrs = true;
4993 }
4994
4995 return NULL_TREE;
4996 }
4997
4998 /* Handle an "interrupt" or "isr" attribute;
4999 arguments as in struct attribute_spec.handler. */
5000 static tree
5001 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5002 bool *no_add_attrs)
5003 {
5004 if (DECL_P (*node))
5005 {
5006 if (TREE_CODE (*node) != FUNCTION_DECL)
5007 {
5008 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5009 name);
5010 *no_add_attrs = true;
5011 }
5012 /* FIXME: the argument if any is checked for type attributes;
5013 should it be checked for decl ones? */
5014 }
5015 else
5016 {
5017 if (TREE_CODE (*node) == FUNCTION_TYPE
5018 || TREE_CODE (*node) == METHOD_TYPE)
5019 {
5020 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5021 {
5022 warning (OPT_Wattributes, "%qE attribute ignored",
5023 name);
5024 *no_add_attrs = true;
5025 }
5026 }
5027 else if (TREE_CODE (*node) == POINTER_TYPE
5028 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5029 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5030 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5031 {
5032 *node = build_variant_type_copy (*node);
5033 TREE_TYPE (*node) = build_type_attribute_variant
5034 (TREE_TYPE (*node),
5035 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5036 *no_add_attrs = true;
5037 }
5038 else
5039 {
5040 /* Possibly pass this attribute on from the type to a decl. */
5041 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5042 | (int) ATTR_FLAG_FUNCTION_NEXT
5043 | (int) ATTR_FLAG_ARRAY_NEXT))
5044 {
5045 *no_add_attrs = true;
5046 return tree_cons (name, args, NULL_TREE);
5047 }
5048 else
5049 {
5050 warning (OPT_Wattributes, "%qE attribute ignored",
5051 name);
5052 }
5053 }
5054 }
5055
5056 return NULL_TREE;
5057 }
5058
5059 /* Handle a "pcs" attribute; arguments as in struct
5060 attribute_spec.handler. */
5061 static tree
5062 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5063 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5064 {
5065 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5066 {
5067 warning (OPT_Wattributes, "%qE attribute ignored", name);
5068 *no_add_attrs = true;
5069 }
5070 return NULL_TREE;
5071 }
5072
5073 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5074 /* Handle the "notshared" attribute. This attribute is another way of
5075 requesting hidden visibility. ARM's compiler supports
5076 "__declspec(notshared)"; we support the same thing via an
5077 attribute. */
5078
5079 static tree
5080 arm_handle_notshared_attribute (tree *node,
5081 tree name ATTRIBUTE_UNUSED,
5082 tree args ATTRIBUTE_UNUSED,
5083 int flags ATTRIBUTE_UNUSED,
5084 bool *no_add_attrs)
5085 {
5086 tree decl = TYPE_NAME (*node);
5087
5088 if (decl)
5089 {
5090 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5091 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5092 *no_add_attrs = false;
5093 }
5094 return NULL_TREE;
5095 }
5096 #endif
5097
5098 /* Return 0 if the attributes for two types are incompatible, 1 if they
5099 are compatible, and 2 if they are nearly compatible (which causes a
5100 warning to be generated). */
5101 static int
5102 arm_comp_type_attributes (const_tree type1, const_tree type2)
5103 {
5104 int l1, l2, s1, s2;
5105
5106 /* Check for mismatch of non-default calling convention. */
5107 if (TREE_CODE (type1) != FUNCTION_TYPE)
5108 return 1;
5109
5110 /* Check for mismatched call attributes. */
5111 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5112 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5113 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5114 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5115
5116 /* Only bother to check if an attribute is defined. */
5117 if (l1 | l2 | s1 | s2)
5118 {
5119 /* If one type has an attribute, the other must have the same attribute. */
5120 if ((l1 != l2) || (s1 != s2))
5121 return 0;
5122
5123 /* Disallow mixed attributes. */
5124 if ((l1 & s2) || (l2 & s1))
5125 return 0;
5126 }
5127
5128 /* Check for mismatched ISR attribute. */
5129 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5130 if (! l1)
5131 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5132 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5133 if (! l2)
5134 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5135 if (l1 != l2)
5136 return 0;
5137
5138 return 1;
5139 }
5140
5141 /* Assigns default attributes to newly defined type. This is used to
5142 set short_call/long_call attributes for function types of
5143 functions defined inside corresponding #pragma scopes. */
5144 static void
5145 arm_set_default_type_attributes (tree type)
5146 {
5147 /* Add __attribute__ ((long_call)) to all functions, when
5148 inside #pragma long_calls or __attribute__ ((short_call)),
5149 when inside #pragma no_long_calls. */
5150 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5151 {
5152 tree type_attr_list, attr_name;
5153 type_attr_list = TYPE_ATTRIBUTES (type);
5154
5155 if (arm_pragma_long_calls == LONG)
5156 attr_name = get_identifier ("long_call");
5157 else if (arm_pragma_long_calls == SHORT)
5158 attr_name = get_identifier ("short_call");
5159 else
5160 return;
5161
5162 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5163 TYPE_ATTRIBUTES (type) = type_attr_list;
5164 }
5165 }
5166 \f
5167 /* Return true if DECL is known to be linked into section SECTION. */
5168
5169 static bool
5170 arm_function_in_section_p (tree decl, section *section)
5171 {
5172 /* We can only be certain about functions defined in the same
5173 compilation unit. */
5174 if (!TREE_STATIC (decl))
5175 return false;
5176
5177 /* Make sure that SYMBOL always binds to the definition in this
5178 compilation unit. */
5179 if (!targetm.binds_local_p (decl))
5180 return false;
5181
5182 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5183 if (!DECL_SECTION_NAME (decl))
5184 {
5185 /* Make sure that we will not create a unique section for DECL. */
5186 if (flag_function_sections || DECL_ONE_ONLY (decl))
5187 return false;
5188 }
5189
5190 return function_section (decl) == section;
5191 }
5192
5193 /* Return nonzero if a 32-bit "long_call" should be generated for
5194 a call from the current function to DECL. We generate a long_call
5195 if the function:
5196
5197 a. has an __attribute__((long call))
5198 or b. is within the scope of a #pragma long_calls
5199 or c. the -mlong-calls command line switch has been specified
5200
5201 However we do not generate a long call if the function:
5202
5203 d. has an __attribute__ ((short_call))
5204 or e. is inside the scope of a #pragma no_long_calls
5205 or f. is defined in the same section as the current function. */
5206
5207 bool
5208 arm_is_long_call_p (tree decl)
5209 {
5210 tree attrs;
5211
5212 if (!decl)
5213 return TARGET_LONG_CALLS;
5214
5215 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5216 if (lookup_attribute ("short_call", attrs))
5217 return false;
5218
5219 /* For "f", be conservative, and only cater for cases in which the
5220 whole of the current function is placed in the same section. */
5221 if (!flag_reorder_blocks_and_partition
5222 && TREE_CODE (decl) == FUNCTION_DECL
5223 && arm_function_in_section_p (decl, current_function_section ()))
5224 return false;
5225
5226 if (lookup_attribute ("long_call", attrs))
5227 return true;
5228
5229 return TARGET_LONG_CALLS;
5230 }
5231
5232 /* Return nonzero if it is ok to make a tail-call to DECL. */
5233 static bool
5234 arm_function_ok_for_sibcall (tree decl, tree exp)
5235 {
5236 unsigned long func_type;
5237
5238 if (cfun->machine->sibcall_blocked)
5239 return false;
5240
5241 /* Never tailcall something for which we have no decl, or if we
5242 are generating code for Thumb-1. */
5243 if (decl == NULL || TARGET_THUMB1)
5244 return false;
5245
5246 /* The PIC register is live on entry to VxWorks PLT entries, so we
5247 must make the call before restoring the PIC register. */
5248 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5249 return false;
5250
5251 /* Cannot tail-call to long calls, since these are out of range of
5252 a branch instruction. */
5253 if (arm_is_long_call_p (decl))
5254 return false;
5255
5256 /* If we are interworking and the function is not declared static
5257 then we can't tail-call it unless we know that it exists in this
5258 compilation unit (since it might be a Thumb routine). */
5259 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5260 return false;
5261
5262 func_type = arm_current_func_type ();
5263 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5264 if (IS_INTERRUPT (func_type))
5265 return false;
5266
5267 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5268 {
5269 /* Check that the return value locations are the same. For
5270 example that we aren't returning a value from the sibling in
5271 a VFP register but then need to transfer it to a core
5272 register. */
5273 rtx a, b;
5274
5275 a = arm_function_value (TREE_TYPE (exp), decl, false);
5276 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5277 cfun->decl, false);
5278 if (!rtx_equal_p (a, b))
5279 return false;
5280 }
5281
5282 /* Never tailcall if function may be called with a misaligned SP. */
5283 if (IS_STACKALIGN (func_type))
5284 return false;
5285
5286 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5287 references should become a NOP. Don't convert such calls into
5288 sibling calls. */
5289 if (TARGET_AAPCS_BASED
5290 && arm_abi == ARM_ABI_AAPCS
5291 && DECL_WEAK (decl))
5292 return false;
5293
5294 /* Everything else is ok. */
5295 return true;
5296 }
5297
5298 \f
5299 /* Addressing mode support functions. */
5300
5301 /* Return nonzero if X is a legitimate immediate operand when compiling
5302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5303 int
5304 legitimate_pic_operand_p (rtx x)
5305 {
5306 if (GET_CODE (x) == SYMBOL_REF
5307 || (GET_CODE (x) == CONST
5308 && GET_CODE (XEXP (x, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5310 return 0;
5311
5312 return 1;
5313 }
5314
5315 /* Record that the current function needs a PIC register. Initialize
5316 cfun->machine->pic_reg if we have not already done so. */
5317
5318 static void
5319 require_pic_register (void)
5320 {
5321 /* A lot of the logic here is made obscure by the fact that this
5322 routine gets called as part of the rtx cost estimation process.
5323 We don't want those calls to affect any assumptions about the real
5324 function; and further, we can't call entry_of_function() until we
5325 start the real expansion process. */
5326 if (!crtl->uses_pic_offset_table)
5327 {
5328 gcc_assert (can_create_pseudo_p ());
5329 if (arm_pic_register != INVALID_REGNUM)
5330 {
5331 if (!cfun->machine->pic_reg)
5332 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5333
5334 /* Play games to avoid marking the function as needing pic
5335 if we are being called as part of the cost-estimation
5336 process. */
5337 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5338 crtl->uses_pic_offset_table = 1;
5339 }
5340 else
5341 {
5342 rtx seq, insn;
5343
5344 if (!cfun->machine->pic_reg)
5345 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5346
5347 /* Play games to avoid marking the function as needing pic
5348 if we are being called as part of the cost-estimation
5349 process. */
5350 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5351 {
5352 crtl->uses_pic_offset_table = 1;
5353 start_sequence ();
5354
5355 arm_load_pic_register (0UL);
5356
5357 seq = get_insns ();
5358 end_sequence ();
5359
5360 for (insn = seq; insn; insn = NEXT_INSN (insn))
5361 if (INSN_P (insn))
5362 INSN_LOCATOR (insn) = prologue_locator;
5363
5364 /* We can be called during expansion of PHI nodes, where
5365 we can't yet emit instructions directly in the final
5366 insn stream. Queue the insns on the entry edge, they will
5367 be committed after everything else is expanded. */
5368 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5369 }
5370 }
5371 }
5372 }
5373
5374 rtx
5375 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5376 {
5377 if (GET_CODE (orig) == SYMBOL_REF
5378 || GET_CODE (orig) == LABEL_REF)
5379 {
5380 rtx insn;
5381
5382 if (reg == 0)
5383 {
5384 gcc_assert (can_create_pseudo_p ());
5385 reg = gen_reg_rtx (Pmode);
5386 }
5387
5388 /* VxWorks does not impose a fixed gap between segments; the run-time
5389 gap can be different from the object-file gap. We therefore can't
5390 use GOTOFF unless we are absolutely sure that the symbol is in the
5391 same segment as the GOT. Unfortunately, the flexibility of linker
5392 scripts means that we can't be sure of that in general, so assume
5393 that GOTOFF is never valid on VxWorks. */
5394 if ((GET_CODE (orig) == LABEL_REF
5395 || (GET_CODE (orig) == SYMBOL_REF &&
5396 SYMBOL_REF_LOCAL_P (orig)))
5397 && NEED_GOT_RELOC
5398 && !TARGET_VXWORKS_RTP)
5399 insn = arm_pic_static_addr (orig, reg);
5400 else
5401 {
5402 rtx pat;
5403 rtx mem;
5404
5405 /* If this function doesn't have a pic register, create one now. */
5406 require_pic_register ();
5407
5408 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5409
5410 /* Make the MEM as close to a constant as possible. */
5411 mem = SET_SRC (pat);
5412 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5413 MEM_READONLY_P (mem) = 1;
5414 MEM_NOTRAP_P (mem) = 1;
5415
5416 insn = emit_insn (pat);
5417 }
5418
5419 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5420 by loop. */
5421 set_unique_reg_note (insn, REG_EQUAL, orig);
5422
5423 return reg;
5424 }
5425 else if (GET_CODE (orig) == CONST)
5426 {
5427 rtx base, offset;
5428
5429 if (GET_CODE (XEXP (orig, 0)) == PLUS
5430 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5431 return orig;
5432
5433 /* Handle the case where we have: const (UNSPEC_TLS). */
5434 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5435 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5436 return orig;
5437
5438 /* Handle the case where we have:
5439 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5440 CONST_INT. */
5441 if (GET_CODE (XEXP (orig, 0)) == PLUS
5442 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5443 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5444 {
5445 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5446 return orig;
5447 }
5448
5449 if (reg == 0)
5450 {
5451 gcc_assert (can_create_pseudo_p ());
5452 reg = gen_reg_rtx (Pmode);
5453 }
5454
5455 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5456
5457 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5458 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5459 base == reg ? 0 : reg);
5460
5461 if (CONST_INT_P (offset))
5462 {
5463 /* The base register doesn't really matter, we only want to
5464 test the index for the appropriate mode. */
5465 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5466 {
5467 gcc_assert (can_create_pseudo_p ());
5468 offset = force_reg (Pmode, offset);
5469 }
5470
5471 if (CONST_INT_P (offset))
5472 return plus_constant (Pmode, base, INTVAL (offset));
5473 }
5474
5475 if (GET_MODE_SIZE (mode) > 4
5476 && (GET_MODE_CLASS (mode) == MODE_INT
5477 || TARGET_SOFT_FLOAT))
5478 {
5479 emit_insn (gen_addsi3 (reg, base, offset));
5480 return reg;
5481 }
5482
5483 return gen_rtx_PLUS (Pmode, base, offset);
5484 }
5485
5486 return orig;
5487 }
5488
5489
5490 /* Find a spare register to use during the prolog of a function. */
5491
5492 static int
5493 thumb_find_work_register (unsigned long pushed_regs_mask)
5494 {
5495 int reg;
5496
5497 /* Check the argument registers first as these are call-used. The
5498 register allocation order means that sometimes r3 might be used
5499 but earlier argument registers might not, so check them all. */
5500 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5501 if (!df_regs_ever_live_p (reg))
5502 return reg;
5503
5504 /* Before going on to check the call-saved registers we can try a couple
5505 more ways of deducing that r3 is available. The first is when we are
5506 pushing anonymous arguments onto the stack and we have less than 4
5507 registers worth of fixed arguments(*). In this case r3 will be part of
5508 the variable argument list and so we can be sure that it will be
5509 pushed right at the start of the function. Hence it will be available
5510 for the rest of the prologue.
5511 (*): ie crtl->args.pretend_args_size is greater than 0. */
5512 if (cfun->machine->uses_anonymous_args
5513 && crtl->args.pretend_args_size > 0)
5514 return LAST_ARG_REGNUM;
5515
5516 /* The other case is when we have fixed arguments but less than 4 registers
5517 worth. In this case r3 might be used in the body of the function, but
5518 it is not being used to convey an argument into the function. In theory
5519 we could just check crtl->args.size to see how many bytes are
5520 being passed in argument registers, but it seems that it is unreliable.
5521 Sometimes it will have the value 0 when in fact arguments are being
5522 passed. (See testcase execute/20021111-1.c for an example). So we also
5523 check the args_info.nregs field as well. The problem with this field is
5524 that it makes no allowances for arguments that are passed to the
5525 function but which are not used. Hence we could miss an opportunity
5526 when a function has an unused argument in r3. But it is better to be
5527 safe than to be sorry. */
5528 if (! cfun->machine->uses_anonymous_args
5529 && crtl->args.size >= 0
5530 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5531 && crtl->args.info.nregs < 4)
5532 return LAST_ARG_REGNUM;
5533
5534 /* Otherwise look for a call-saved register that is going to be pushed. */
5535 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5536 if (pushed_regs_mask & (1 << reg))
5537 return reg;
5538
5539 if (TARGET_THUMB2)
5540 {
5541 /* Thumb-2 can use high regs. */
5542 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5543 if (pushed_regs_mask & (1 << reg))
5544 return reg;
5545 }
5546 /* Something went wrong - thumb_compute_save_reg_mask()
5547 should have arranged for a suitable register to be pushed. */
5548 gcc_unreachable ();
5549 }
5550
5551 static GTY(()) int pic_labelno;
5552
5553 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5554 low register. */
5555
5556 void
5557 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5558 {
5559 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5560
5561 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5562 return;
5563
5564 gcc_assert (flag_pic);
5565
5566 pic_reg = cfun->machine->pic_reg;
5567 if (TARGET_VXWORKS_RTP)
5568 {
5569 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5570 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5571 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5572
5573 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5574
5575 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5576 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5577 }
5578 else
5579 {
5580 /* We use an UNSPEC rather than a LABEL_REF because this label
5581 never appears in the code stream. */
5582
5583 labelno = GEN_INT (pic_labelno++);
5584 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5585 l1 = gen_rtx_CONST (VOIDmode, l1);
5586
5587 /* On the ARM the PC register contains 'dot + 8' at the time of the
5588 addition, on the Thumb it is 'dot + 4'. */
5589 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5590 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5591 UNSPEC_GOTSYM_OFF);
5592 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5593
5594 if (TARGET_32BIT)
5595 {
5596 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5597 }
5598 else /* TARGET_THUMB1 */
5599 {
5600 if (arm_pic_register != INVALID_REGNUM
5601 && REGNO (pic_reg) > LAST_LO_REGNUM)
5602 {
5603 /* We will have pushed the pic register, so we should always be
5604 able to find a work register. */
5605 pic_tmp = gen_rtx_REG (SImode,
5606 thumb_find_work_register (saved_regs));
5607 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5608 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5609 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5610 }
5611 else
5612 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5613 }
5614 }
5615
5616 /* Need to emit this whether or not we obey regdecls,
5617 since setjmp/longjmp can cause life info to screw up. */
5618 emit_use (pic_reg);
5619 }
5620
5621 /* Generate code to load the address of a static var when flag_pic is set. */
5622 static rtx
5623 arm_pic_static_addr (rtx orig, rtx reg)
5624 {
5625 rtx l1, labelno, offset_rtx, insn;
5626
5627 gcc_assert (flag_pic);
5628
5629 /* We use an UNSPEC rather than a LABEL_REF because this label
5630 never appears in the code stream. */
5631 labelno = GEN_INT (pic_labelno++);
5632 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5633 l1 = gen_rtx_CONST (VOIDmode, l1);
5634
5635 /* On the ARM the PC register contains 'dot + 8' at the time of the
5636 addition, on the Thumb it is 'dot + 4'. */
5637 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5638 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5639 UNSPEC_SYMBOL_OFFSET);
5640 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5641
5642 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5643 return insn;
5644 }
5645
5646 /* Return nonzero if X is valid as an ARM state addressing register. */
5647 static int
5648 arm_address_register_rtx_p (rtx x, int strict_p)
5649 {
5650 int regno;
5651
5652 if (!REG_P (x))
5653 return 0;
5654
5655 regno = REGNO (x);
5656
5657 if (strict_p)
5658 return ARM_REGNO_OK_FOR_BASE_P (regno);
5659
5660 return (regno <= LAST_ARM_REGNUM
5661 || regno >= FIRST_PSEUDO_REGISTER
5662 || regno == FRAME_POINTER_REGNUM
5663 || regno == ARG_POINTER_REGNUM);
5664 }
5665
5666 /* Return TRUE if this rtx is the difference of a symbol and a label,
5667 and will reduce to a PC-relative relocation in the object file.
5668 Expressions like this can be left alone when generating PIC, rather
5669 than forced through the GOT. */
5670 static int
5671 pcrel_constant_p (rtx x)
5672 {
5673 if (GET_CODE (x) == MINUS)
5674 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5675
5676 return FALSE;
5677 }
5678
5679 /* Return true if X will surely end up in an index register after next
5680 splitting pass. */
5681 static bool
5682 will_be_in_index_register (const_rtx x)
5683 {
5684 /* arm.md: calculate_pic_address will split this into a register. */
5685 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5686 }
5687
5688 /* Return nonzero if X is a valid ARM state address operand. */
5689 int
5690 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5691 int strict_p)
5692 {
5693 bool use_ldrd;
5694 enum rtx_code code = GET_CODE (x);
5695
5696 if (arm_address_register_rtx_p (x, strict_p))
5697 return 1;
5698
5699 use_ldrd = (TARGET_LDRD
5700 && (mode == DImode
5701 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5702
5703 if (code == POST_INC || code == PRE_DEC
5704 || ((code == PRE_INC || code == POST_DEC)
5705 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5706 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5707
5708 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5709 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5710 && GET_CODE (XEXP (x, 1)) == PLUS
5711 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5712 {
5713 rtx addend = XEXP (XEXP (x, 1), 1);
5714
5715 /* Don't allow ldrd post increment by register because it's hard
5716 to fixup invalid register choices. */
5717 if (use_ldrd
5718 && GET_CODE (x) == POST_MODIFY
5719 && REG_P (addend))
5720 return 0;
5721
5722 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5723 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5724 }
5725
5726 /* After reload constants split into minipools will have addresses
5727 from a LABEL_REF. */
5728 else if (reload_completed
5729 && (code == LABEL_REF
5730 || (code == CONST
5731 && GET_CODE (XEXP (x, 0)) == PLUS
5732 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5733 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5734 return 1;
5735
5736 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5737 return 0;
5738
5739 else if (code == PLUS)
5740 {
5741 rtx xop0 = XEXP (x, 0);
5742 rtx xop1 = XEXP (x, 1);
5743
5744 return ((arm_address_register_rtx_p (xop0, strict_p)
5745 && ((CONST_INT_P (xop1)
5746 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5747 || (!strict_p && will_be_in_index_register (xop1))))
5748 || (arm_address_register_rtx_p (xop1, strict_p)
5749 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5750 }
5751
5752 #if 0
5753 /* Reload currently can't handle MINUS, so disable this for now */
5754 else if (GET_CODE (x) == MINUS)
5755 {
5756 rtx xop0 = XEXP (x, 0);
5757 rtx xop1 = XEXP (x, 1);
5758
5759 return (arm_address_register_rtx_p (xop0, strict_p)
5760 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5761 }
5762 #endif
5763
5764 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5765 && code == SYMBOL_REF
5766 && CONSTANT_POOL_ADDRESS_P (x)
5767 && ! (flag_pic
5768 && symbol_mentioned_p (get_pool_constant (x))
5769 && ! pcrel_constant_p (get_pool_constant (x))))
5770 return 1;
5771
5772 return 0;
5773 }
5774
5775 /* Return nonzero if X is a valid Thumb-2 address operand. */
5776 static int
5777 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5778 {
5779 bool use_ldrd;
5780 enum rtx_code code = GET_CODE (x);
5781
5782 if (arm_address_register_rtx_p (x, strict_p))
5783 return 1;
5784
5785 use_ldrd = (TARGET_LDRD
5786 && (mode == DImode
5787 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5788
5789 if (code == POST_INC || code == PRE_DEC
5790 || ((code == PRE_INC || code == POST_DEC)
5791 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5792 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5793
5794 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5795 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5796 && GET_CODE (XEXP (x, 1)) == PLUS
5797 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5798 {
5799 /* Thumb-2 only has autoincrement by constant. */
5800 rtx addend = XEXP (XEXP (x, 1), 1);
5801 HOST_WIDE_INT offset;
5802
5803 if (!CONST_INT_P (addend))
5804 return 0;
5805
5806 offset = INTVAL(addend);
5807 if (GET_MODE_SIZE (mode) <= 4)
5808 return (offset > -256 && offset < 256);
5809
5810 return (use_ldrd && offset > -1024 && offset < 1024
5811 && (offset & 3) == 0);
5812 }
5813
5814 /* After reload constants split into minipools will have addresses
5815 from a LABEL_REF. */
5816 else if (reload_completed
5817 && (code == LABEL_REF
5818 || (code == CONST
5819 && GET_CODE (XEXP (x, 0)) == PLUS
5820 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5821 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5822 return 1;
5823
5824 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5825 return 0;
5826
5827 else if (code == PLUS)
5828 {
5829 rtx xop0 = XEXP (x, 0);
5830 rtx xop1 = XEXP (x, 1);
5831
5832 return ((arm_address_register_rtx_p (xop0, strict_p)
5833 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5834 || (!strict_p && will_be_in_index_register (xop1))))
5835 || (arm_address_register_rtx_p (xop1, strict_p)
5836 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5837 }
5838
5839 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5840 && code == SYMBOL_REF
5841 && CONSTANT_POOL_ADDRESS_P (x)
5842 && ! (flag_pic
5843 && symbol_mentioned_p (get_pool_constant (x))
5844 && ! pcrel_constant_p (get_pool_constant (x))))
5845 return 1;
5846
5847 return 0;
5848 }
5849
5850 /* Return nonzero if INDEX is valid for an address index operand in
5851 ARM state. */
5852 static int
5853 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5854 int strict_p)
5855 {
5856 HOST_WIDE_INT range;
5857 enum rtx_code code = GET_CODE (index);
5858
5859 /* Standard coprocessor addressing modes. */
5860 if (TARGET_HARD_FLOAT
5861 && TARGET_VFP
5862 && (mode == SFmode || mode == DFmode))
5863 return (code == CONST_INT && INTVAL (index) < 1024
5864 && INTVAL (index) > -1024
5865 && (INTVAL (index) & 3) == 0);
5866
5867 /* For quad modes, we restrict the constant offset to be slightly less
5868 than what the instruction format permits. We do this because for
5869 quad mode moves, we will actually decompose them into two separate
5870 double-mode reads or writes. INDEX must therefore be a valid
5871 (double-mode) offset and so should INDEX+8. */
5872 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5873 return (code == CONST_INT
5874 && INTVAL (index) < 1016
5875 && INTVAL (index) > -1024
5876 && (INTVAL (index) & 3) == 0);
5877
5878 /* We have no such constraint on double mode offsets, so we permit the
5879 full range of the instruction format. */
5880 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5881 return (code == CONST_INT
5882 && INTVAL (index) < 1024
5883 && INTVAL (index) > -1024
5884 && (INTVAL (index) & 3) == 0);
5885
5886 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5887 return (code == CONST_INT
5888 && INTVAL (index) < 1024
5889 && INTVAL (index) > -1024
5890 && (INTVAL (index) & 3) == 0);
5891
5892 if (arm_address_register_rtx_p (index, strict_p)
5893 && (GET_MODE_SIZE (mode) <= 4))
5894 return 1;
5895
5896 if (mode == DImode || mode == DFmode)
5897 {
5898 if (code == CONST_INT)
5899 {
5900 HOST_WIDE_INT val = INTVAL (index);
5901
5902 if (TARGET_LDRD)
5903 return val > -256 && val < 256;
5904 else
5905 return val > -4096 && val < 4092;
5906 }
5907
5908 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5909 }
5910
5911 if (GET_MODE_SIZE (mode) <= 4
5912 && ! (arm_arch4
5913 && (mode == HImode
5914 || mode == HFmode
5915 || (mode == QImode && outer == SIGN_EXTEND))))
5916 {
5917 if (code == MULT)
5918 {
5919 rtx xiop0 = XEXP (index, 0);
5920 rtx xiop1 = XEXP (index, 1);
5921
5922 return ((arm_address_register_rtx_p (xiop0, strict_p)
5923 && power_of_two_operand (xiop1, SImode))
5924 || (arm_address_register_rtx_p (xiop1, strict_p)
5925 && power_of_two_operand (xiop0, SImode)));
5926 }
5927 else if (code == LSHIFTRT || code == ASHIFTRT
5928 || code == ASHIFT || code == ROTATERT)
5929 {
5930 rtx op = XEXP (index, 1);
5931
5932 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5933 && CONST_INT_P (op)
5934 && INTVAL (op) > 0
5935 && INTVAL (op) <= 31);
5936 }
5937 }
5938
5939 /* For ARM v4 we may be doing a sign-extend operation during the
5940 load. */
5941 if (arm_arch4)
5942 {
5943 if (mode == HImode
5944 || mode == HFmode
5945 || (outer == SIGN_EXTEND && mode == QImode))
5946 range = 256;
5947 else
5948 range = 4096;
5949 }
5950 else
5951 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5952
5953 return (code == CONST_INT
5954 && INTVAL (index) < range
5955 && INTVAL (index) > -range);
5956 }
5957
5958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5959 index operand. i.e. 1, 2, 4 or 8. */
5960 static bool
5961 thumb2_index_mul_operand (rtx op)
5962 {
5963 HOST_WIDE_INT val;
5964
5965 if (!CONST_INT_P (op))
5966 return false;
5967
5968 val = INTVAL(op);
5969 return (val == 1 || val == 2 || val == 4 || val == 8);
5970 }
5971
5972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5973 static int
5974 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5975 {
5976 enum rtx_code code = GET_CODE (index);
5977
5978 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5979 /* Standard coprocessor addressing modes. */
5980 if (TARGET_HARD_FLOAT
5981 && TARGET_VFP
5982 && (mode == SFmode || mode == DFmode))
5983 return (code == CONST_INT && INTVAL (index) < 1024
5984 /* Thumb-2 allows only > -256 index range for it's core register
5985 load/stores. Since we allow SF/DF in core registers, we have
5986 to use the intersection between -256~4096 (core) and -1024~1024
5987 (coprocessor). */
5988 && INTVAL (index) > -256
5989 && (INTVAL (index) & 3) == 0);
5990
5991 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5992 {
5993 /* For DImode assume values will usually live in core regs
5994 and only allow LDRD addressing modes. */
5995 if (!TARGET_LDRD || mode != DImode)
5996 return (code == CONST_INT
5997 && INTVAL (index) < 1024
5998 && INTVAL (index) > -1024
5999 && (INTVAL (index) & 3) == 0);
6000 }
6001
6002 /* For quad modes, we restrict the constant offset to be slightly less
6003 than what the instruction format permits. We do this because for
6004 quad mode moves, we will actually decompose them into two separate
6005 double-mode reads or writes. INDEX must therefore be a valid
6006 (double-mode) offset and so should INDEX+8. */
6007 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6008 return (code == CONST_INT
6009 && INTVAL (index) < 1016
6010 && INTVAL (index) > -1024
6011 && (INTVAL (index) & 3) == 0);
6012
6013 /* We have no such constraint on double mode offsets, so we permit the
6014 full range of the instruction format. */
6015 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6016 return (code == CONST_INT
6017 && INTVAL (index) < 1024
6018 && INTVAL (index) > -1024
6019 && (INTVAL (index) & 3) == 0);
6020
6021 if (arm_address_register_rtx_p (index, strict_p)
6022 && (GET_MODE_SIZE (mode) <= 4))
6023 return 1;
6024
6025 if (mode == DImode || mode == DFmode)
6026 {
6027 if (code == CONST_INT)
6028 {
6029 HOST_WIDE_INT val = INTVAL (index);
6030 /* ??? Can we assume ldrd for thumb2? */
6031 /* Thumb-2 ldrd only has reg+const addressing modes. */
6032 /* ldrd supports offsets of +-1020.
6033 However the ldr fallback does not. */
6034 return val > -256 && val < 256 && (val & 3) == 0;
6035 }
6036 else
6037 return 0;
6038 }
6039
6040 if (code == MULT)
6041 {
6042 rtx xiop0 = XEXP (index, 0);
6043 rtx xiop1 = XEXP (index, 1);
6044
6045 return ((arm_address_register_rtx_p (xiop0, strict_p)
6046 && thumb2_index_mul_operand (xiop1))
6047 || (arm_address_register_rtx_p (xiop1, strict_p)
6048 && thumb2_index_mul_operand (xiop0)));
6049 }
6050 else if (code == ASHIFT)
6051 {
6052 rtx op = XEXP (index, 1);
6053
6054 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6055 && CONST_INT_P (op)
6056 && INTVAL (op) > 0
6057 && INTVAL (op) <= 3);
6058 }
6059
6060 return (code == CONST_INT
6061 && INTVAL (index) < 4096
6062 && INTVAL (index) > -256);
6063 }
6064
6065 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6066 static int
6067 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6068 {
6069 int regno;
6070
6071 if (!REG_P (x))
6072 return 0;
6073
6074 regno = REGNO (x);
6075
6076 if (strict_p)
6077 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6078
6079 return (regno <= LAST_LO_REGNUM
6080 || regno > LAST_VIRTUAL_REGISTER
6081 || regno == FRAME_POINTER_REGNUM
6082 || (GET_MODE_SIZE (mode) >= 4
6083 && (regno == STACK_POINTER_REGNUM
6084 || regno >= FIRST_PSEUDO_REGISTER
6085 || x == hard_frame_pointer_rtx
6086 || x == arg_pointer_rtx)));
6087 }
6088
6089 /* Return nonzero if x is a legitimate index register. This is the case
6090 for any base register that can access a QImode object. */
6091 inline static int
6092 thumb1_index_register_rtx_p (rtx x, int strict_p)
6093 {
6094 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6095 }
6096
6097 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6098
6099 The AP may be eliminated to either the SP or the FP, so we use the
6100 least common denominator, e.g. SImode, and offsets from 0 to 64.
6101
6102 ??? Verify whether the above is the right approach.
6103
6104 ??? Also, the FP may be eliminated to the SP, so perhaps that
6105 needs special handling also.
6106
6107 ??? Look at how the mips16 port solves this problem. It probably uses
6108 better ways to solve some of these problems.
6109
6110 Although it is not incorrect, we don't accept QImode and HImode
6111 addresses based on the frame pointer or arg pointer until the
6112 reload pass starts. This is so that eliminating such addresses
6113 into stack based ones won't produce impossible code. */
6114 int
6115 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6116 {
6117 /* ??? Not clear if this is right. Experiment. */
6118 if (GET_MODE_SIZE (mode) < 4
6119 && !(reload_in_progress || reload_completed)
6120 && (reg_mentioned_p (frame_pointer_rtx, x)
6121 || reg_mentioned_p (arg_pointer_rtx, x)
6122 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6123 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6124 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6125 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6126 return 0;
6127
6128 /* Accept any base register. SP only in SImode or larger. */
6129 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6130 return 1;
6131
6132 /* This is PC relative data before arm_reorg runs. */
6133 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6134 && GET_CODE (x) == SYMBOL_REF
6135 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6136 return 1;
6137
6138 /* This is PC relative data after arm_reorg runs. */
6139 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6140 && reload_completed
6141 && (GET_CODE (x) == LABEL_REF
6142 || (GET_CODE (x) == CONST
6143 && GET_CODE (XEXP (x, 0)) == PLUS
6144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6145 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6146 return 1;
6147
6148 /* Post-inc indexing only supported for SImode and larger. */
6149 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6150 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6151 return 1;
6152
6153 else if (GET_CODE (x) == PLUS)
6154 {
6155 /* REG+REG address can be any two index registers. */
6156 /* We disallow FRAME+REG addressing since we know that FRAME
6157 will be replaced with STACK, and SP relative addressing only
6158 permits SP+OFFSET. */
6159 if (GET_MODE_SIZE (mode) <= 4
6160 && XEXP (x, 0) != frame_pointer_rtx
6161 && XEXP (x, 1) != frame_pointer_rtx
6162 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6163 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6164 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6165 return 1;
6166
6167 /* REG+const has 5-7 bit offset for non-SP registers. */
6168 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6169 || XEXP (x, 0) == arg_pointer_rtx)
6170 && CONST_INT_P (XEXP (x, 1))
6171 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6172 return 1;
6173
6174 /* REG+const has 10-bit offset for SP, but only SImode and
6175 larger is supported. */
6176 /* ??? Should probably check for DI/DFmode overflow here
6177 just like GO_IF_LEGITIMATE_OFFSET does. */
6178 else if (REG_P (XEXP (x, 0))
6179 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6180 && GET_MODE_SIZE (mode) >= 4
6181 && CONST_INT_P (XEXP (x, 1))
6182 && INTVAL (XEXP (x, 1)) >= 0
6183 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6184 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6185 return 1;
6186
6187 else if (REG_P (XEXP (x, 0))
6188 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6189 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6190 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6191 && REGNO (XEXP (x, 0))
6192 <= LAST_VIRTUAL_POINTER_REGISTER))
6193 && GET_MODE_SIZE (mode) >= 4
6194 && CONST_INT_P (XEXP (x, 1))
6195 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6196 return 1;
6197 }
6198
6199 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6200 && GET_MODE_SIZE (mode) == 4
6201 && GET_CODE (x) == SYMBOL_REF
6202 && CONSTANT_POOL_ADDRESS_P (x)
6203 && ! (flag_pic
6204 && symbol_mentioned_p (get_pool_constant (x))
6205 && ! pcrel_constant_p (get_pool_constant (x))))
6206 return 1;
6207
6208 return 0;
6209 }
6210
6211 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6212 instruction of mode MODE. */
6213 int
6214 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6215 {
6216 switch (GET_MODE_SIZE (mode))
6217 {
6218 case 1:
6219 return val >= 0 && val < 32;
6220
6221 case 2:
6222 return val >= 0 && val < 64 && (val & 1) == 0;
6223
6224 default:
6225 return (val >= 0
6226 && (val + GET_MODE_SIZE (mode)) <= 128
6227 && (val & 3) == 0);
6228 }
6229 }
6230
6231 bool
6232 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6233 {
6234 if (TARGET_ARM)
6235 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6236 else if (TARGET_THUMB2)
6237 return thumb2_legitimate_address_p (mode, x, strict_p);
6238 else /* if (TARGET_THUMB1) */
6239 return thumb1_legitimate_address_p (mode, x, strict_p);
6240 }
6241
6242 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6243
6244 Given an rtx X being reloaded into a reg required to be
6245 in class CLASS, return the class of reg to actually use.
6246 In general this is just CLASS, but for the Thumb core registers and
6247 immediate constants we prefer a LO_REGS class or a subset. */
6248
6249 static reg_class_t
6250 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6251 {
6252 if (TARGET_32BIT)
6253 return rclass;
6254 else
6255 {
6256 if (rclass == GENERAL_REGS
6257 || rclass == HI_REGS
6258 || rclass == NO_REGS
6259 || rclass == STACK_REG)
6260 return LO_REGS;
6261 else
6262 return rclass;
6263 }
6264 }
6265
6266 /* Build the SYMBOL_REF for __tls_get_addr. */
6267
6268 static GTY(()) rtx tls_get_addr_libfunc;
6269
6270 static rtx
6271 get_tls_get_addr (void)
6272 {
6273 if (!tls_get_addr_libfunc)
6274 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6275 return tls_get_addr_libfunc;
6276 }
6277
6278 static rtx
6279 arm_load_tp (rtx target)
6280 {
6281 if (!target)
6282 target = gen_reg_rtx (SImode);
6283
6284 if (TARGET_HARD_TP)
6285 {
6286 /* Can return in any reg. */
6287 emit_insn (gen_load_tp_hard (target));
6288 }
6289 else
6290 {
6291 /* Always returned in r0. Immediately copy the result into a pseudo,
6292 otherwise other uses of r0 (e.g. setting up function arguments) may
6293 clobber the value. */
6294
6295 rtx tmp;
6296
6297 emit_insn (gen_load_tp_soft ());
6298
6299 tmp = gen_rtx_REG (SImode, 0);
6300 emit_move_insn (target, tmp);
6301 }
6302 return target;
6303 }
6304
6305 static rtx
6306 load_tls_operand (rtx x, rtx reg)
6307 {
6308 rtx tmp;
6309
6310 if (reg == NULL_RTX)
6311 reg = gen_reg_rtx (SImode);
6312
6313 tmp = gen_rtx_CONST (SImode, x);
6314
6315 emit_move_insn (reg, tmp);
6316
6317 return reg;
6318 }
6319
6320 static rtx
6321 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6322 {
6323 rtx insns, label, labelno, sum;
6324
6325 gcc_assert (reloc != TLS_DESCSEQ);
6326 start_sequence ();
6327
6328 labelno = GEN_INT (pic_labelno++);
6329 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6330 label = gen_rtx_CONST (VOIDmode, label);
6331
6332 sum = gen_rtx_UNSPEC (Pmode,
6333 gen_rtvec (4, x, GEN_INT (reloc), label,
6334 GEN_INT (TARGET_ARM ? 8 : 4)),
6335 UNSPEC_TLS);
6336 reg = load_tls_operand (sum, reg);
6337
6338 if (TARGET_ARM)
6339 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6340 else
6341 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6342
6343 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6344 LCT_PURE, /* LCT_CONST? */
6345 Pmode, 1, reg, Pmode);
6346
6347 insns = get_insns ();
6348 end_sequence ();
6349
6350 return insns;
6351 }
6352
6353 static rtx
6354 arm_tls_descseq_addr (rtx x, rtx reg)
6355 {
6356 rtx labelno = GEN_INT (pic_labelno++);
6357 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6358 rtx sum = gen_rtx_UNSPEC (Pmode,
6359 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6360 gen_rtx_CONST (VOIDmode, label),
6361 GEN_INT (!TARGET_ARM)),
6362 UNSPEC_TLS);
6363 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6364
6365 emit_insn (gen_tlscall (x, labelno));
6366 if (!reg)
6367 reg = gen_reg_rtx (SImode);
6368 else
6369 gcc_assert (REGNO (reg) != 0);
6370
6371 emit_move_insn (reg, reg0);
6372
6373 return reg;
6374 }
6375
6376 rtx
6377 legitimize_tls_address (rtx x, rtx reg)
6378 {
6379 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6380 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6381
6382 switch (model)
6383 {
6384 case TLS_MODEL_GLOBAL_DYNAMIC:
6385 if (TARGET_GNU2_TLS)
6386 {
6387 reg = arm_tls_descseq_addr (x, reg);
6388
6389 tp = arm_load_tp (NULL_RTX);
6390
6391 dest = gen_rtx_PLUS (Pmode, tp, reg);
6392 }
6393 else
6394 {
6395 /* Original scheme */
6396 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6397 dest = gen_reg_rtx (Pmode);
6398 emit_libcall_block (insns, dest, ret, x);
6399 }
6400 return dest;
6401
6402 case TLS_MODEL_LOCAL_DYNAMIC:
6403 if (TARGET_GNU2_TLS)
6404 {
6405 reg = arm_tls_descseq_addr (x, reg);
6406
6407 tp = arm_load_tp (NULL_RTX);
6408
6409 dest = gen_rtx_PLUS (Pmode, tp, reg);
6410 }
6411 else
6412 {
6413 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6414
6415 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6416 share the LDM result with other LD model accesses. */
6417 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6418 UNSPEC_TLS);
6419 dest = gen_reg_rtx (Pmode);
6420 emit_libcall_block (insns, dest, ret, eqv);
6421
6422 /* Load the addend. */
6423 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6424 GEN_INT (TLS_LDO32)),
6425 UNSPEC_TLS);
6426 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6427 dest = gen_rtx_PLUS (Pmode, dest, addend);
6428 }
6429 return dest;
6430
6431 case TLS_MODEL_INITIAL_EXEC:
6432 labelno = GEN_INT (pic_labelno++);
6433 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6434 label = gen_rtx_CONST (VOIDmode, label);
6435 sum = gen_rtx_UNSPEC (Pmode,
6436 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6437 GEN_INT (TARGET_ARM ? 8 : 4)),
6438 UNSPEC_TLS);
6439 reg = load_tls_operand (sum, reg);
6440
6441 if (TARGET_ARM)
6442 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6443 else if (TARGET_THUMB2)
6444 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6445 else
6446 {
6447 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6448 emit_move_insn (reg, gen_const_mem (SImode, reg));
6449 }
6450
6451 tp = arm_load_tp (NULL_RTX);
6452
6453 return gen_rtx_PLUS (Pmode, tp, reg);
6454
6455 case TLS_MODEL_LOCAL_EXEC:
6456 tp = arm_load_tp (NULL_RTX);
6457
6458 reg = gen_rtx_UNSPEC (Pmode,
6459 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6460 UNSPEC_TLS);
6461 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6462
6463 return gen_rtx_PLUS (Pmode, tp, reg);
6464
6465 default:
6466 abort ();
6467 }
6468 }
6469
6470 /* Try machine-dependent ways of modifying an illegitimate address
6471 to be legitimate. If we find one, return the new, valid address. */
6472 rtx
6473 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6474 {
6475 if (!TARGET_ARM)
6476 {
6477 /* TODO: legitimize_address for Thumb2. */
6478 if (TARGET_THUMB2)
6479 return x;
6480 return thumb_legitimize_address (x, orig_x, mode);
6481 }
6482
6483 if (arm_tls_symbol_p (x))
6484 return legitimize_tls_address (x, NULL_RTX);
6485
6486 if (GET_CODE (x) == PLUS)
6487 {
6488 rtx xop0 = XEXP (x, 0);
6489 rtx xop1 = XEXP (x, 1);
6490
6491 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6492 xop0 = force_reg (SImode, xop0);
6493
6494 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6495 xop1 = force_reg (SImode, xop1);
6496
6497 if (ARM_BASE_REGISTER_RTX_P (xop0)
6498 && CONST_INT_P (xop1))
6499 {
6500 HOST_WIDE_INT n, low_n;
6501 rtx base_reg, val;
6502 n = INTVAL (xop1);
6503
6504 /* VFP addressing modes actually allow greater offsets, but for
6505 now we just stick with the lowest common denominator. */
6506 if (mode == DImode
6507 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6508 {
6509 low_n = n & 0x0f;
6510 n &= ~0x0f;
6511 if (low_n > 4)
6512 {
6513 n += 16;
6514 low_n -= 16;
6515 }
6516 }
6517 else
6518 {
6519 low_n = ((mode) == TImode ? 0
6520 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6521 n -= low_n;
6522 }
6523
6524 base_reg = gen_reg_rtx (SImode);
6525 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6526 emit_move_insn (base_reg, val);
6527 x = plus_constant (Pmode, base_reg, low_n);
6528 }
6529 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6530 x = gen_rtx_PLUS (SImode, xop0, xop1);
6531 }
6532
6533 /* XXX We don't allow MINUS any more -- see comment in
6534 arm_legitimate_address_outer_p (). */
6535 else if (GET_CODE (x) == MINUS)
6536 {
6537 rtx xop0 = XEXP (x, 0);
6538 rtx xop1 = XEXP (x, 1);
6539
6540 if (CONSTANT_P (xop0))
6541 xop0 = force_reg (SImode, xop0);
6542
6543 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6544 xop1 = force_reg (SImode, xop1);
6545
6546 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6547 x = gen_rtx_MINUS (SImode, xop0, xop1);
6548 }
6549
6550 /* Make sure to take full advantage of the pre-indexed addressing mode
6551 with absolute addresses which often allows for the base register to
6552 be factorized for multiple adjacent memory references, and it might
6553 even allows for the mini pool to be avoided entirely. */
6554 else if (CONST_INT_P (x) && optimize > 0)
6555 {
6556 unsigned int bits;
6557 HOST_WIDE_INT mask, base, index;
6558 rtx base_reg;
6559
6560 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6561 use a 8-bit index. So let's use a 12-bit index for SImode only and
6562 hope that arm_gen_constant will enable ldrb to use more bits. */
6563 bits = (mode == SImode) ? 12 : 8;
6564 mask = (1 << bits) - 1;
6565 base = INTVAL (x) & ~mask;
6566 index = INTVAL (x) & mask;
6567 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6568 {
6569 /* It'll most probably be more efficient to generate the base
6570 with more bits set and use a negative index instead. */
6571 base |= mask;
6572 index -= mask;
6573 }
6574 base_reg = force_reg (SImode, GEN_INT (base));
6575 x = plus_constant (Pmode, base_reg, index);
6576 }
6577
6578 if (flag_pic)
6579 {
6580 /* We need to find and carefully transform any SYMBOL and LABEL
6581 references; so go back to the original address expression. */
6582 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6583
6584 if (new_x != orig_x)
6585 x = new_x;
6586 }
6587
6588 return x;
6589 }
6590
6591
6592 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6593 to be legitimate. If we find one, return the new, valid address. */
6594 rtx
6595 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6596 {
6597 if (arm_tls_symbol_p (x))
6598 return legitimize_tls_address (x, NULL_RTX);
6599
6600 if (GET_CODE (x) == PLUS
6601 && CONST_INT_P (XEXP (x, 1))
6602 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6603 || INTVAL (XEXP (x, 1)) < 0))
6604 {
6605 rtx xop0 = XEXP (x, 0);
6606 rtx xop1 = XEXP (x, 1);
6607 HOST_WIDE_INT offset = INTVAL (xop1);
6608
6609 /* Try and fold the offset into a biasing of the base register and
6610 then offsetting that. Don't do this when optimizing for space
6611 since it can cause too many CSEs. */
6612 if (optimize_size && offset >= 0
6613 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6614 {
6615 HOST_WIDE_INT delta;
6616
6617 if (offset >= 256)
6618 delta = offset - (256 - GET_MODE_SIZE (mode));
6619 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6620 delta = 31 * GET_MODE_SIZE (mode);
6621 else
6622 delta = offset & (~31 * GET_MODE_SIZE (mode));
6623
6624 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6625 NULL_RTX);
6626 x = plus_constant (Pmode, xop0, delta);
6627 }
6628 else if (offset < 0 && offset > -256)
6629 /* Small negative offsets are best done with a subtract before the
6630 dereference, forcing these into a register normally takes two
6631 instructions. */
6632 x = force_operand (x, NULL_RTX);
6633 else
6634 {
6635 /* For the remaining cases, force the constant into a register. */
6636 xop1 = force_reg (SImode, xop1);
6637 x = gen_rtx_PLUS (SImode, xop0, xop1);
6638 }
6639 }
6640 else if (GET_CODE (x) == PLUS
6641 && s_register_operand (XEXP (x, 1), SImode)
6642 && !s_register_operand (XEXP (x, 0), SImode))
6643 {
6644 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6645
6646 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6647 }
6648
6649 if (flag_pic)
6650 {
6651 /* We need to find and carefully transform any SYMBOL and LABEL
6652 references; so go back to the original address expression. */
6653 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6654
6655 if (new_x != orig_x)
6656 x = new_x;
6657 }
6658
6659 return x;
6660 }
6661
6662 bool
6663 arm_legitimize_reload_address (rtx *p,
6664 enum machine_mode mode,
6665 int opnum, int type,
6666 int ind_levels ATTRIBUTE_UNUSED)
6667 {
6668 /* We must recognize output that we have already generated ourselves. */
6669 if (GET_CODE (*p) == PLUS
6670 && GET_CODE (XEXP (*p, 0)) == PLUS
6671 && REG_P (XEXP (XEXP (*p, 0), 0))
6672 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6673 && CONST_INT_P (XEXP (*p, 1)))
6674 {
6675 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6676 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6677 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6678 return true;
6679 }
6680
6681 if (GET_CODE (*p) == PLUS
6682 && REG_P (XEXP (*p, 0))
6683 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6684 /* If the base register is equivalent to a constant, let the generic
6685 code handle it. Otherwise we will run into problems if a future
6686 reload pass decides to rematerialize the constant. */
6687 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6688 && CONST_INT_P (XEXP (*p, 1)))
6689 {
6690 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6691 HOST_WIDE_INT low, high;
6692
6693 /* Detect coprocessor load/stores. */
6694 bool coproc_p = ((TARGET_HARD_FLOAT
6695 && TARGET_VFP
6696 && (mode == SFmode || mode == DFmode))
6697 || (TARGET_REALLY_IWMMXT
6698 && VALID_IWMMXT_REG_MODE (mode))
6699 || (TARGET_NEON
6700 && (VALID_NEON_DREG_MODE (mode)
6701 || VALID_NEON_QREG_MODE (mode))));
6702
6703 /* For some conditions, bail out when lower two bits are unaligned. */
6704 if ((val & 0x3) != 0
6705 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6706 && (coproc_p
6707 /* For DI, and DF under soft-float: */
6708 || ((mode == DImode || mode == DFmode)
6709 /* Without ldrd, we use stm/ldm, which does not
6710 fair well with unaligned bits. */
6711 && (! TARGET_LDRD
6712 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6713 || TARGET_THUMB2))))
6714 return false;
6715
6716 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6717 of which the (reg+high) gets turned into a reload add insn,
6718 we try to decompose the index into high/low values that can often
6719 also lead to better reload CSE.
6720 For example:
6721 ldr r0, [r2, #4100] // Offset too large
6722 ldr r1, [r2, #4104] // Offset too large
6723
6724 is best reloaded as:
6725 add t1, r2, #4096
6726 ldr r0, [t1, #4]
6727 add t2, r2, #4096
6728 ldr r1, [t2, #8]
6729
6730 which post-reload CSE can simplify in most cases to eliminate the
6731 second add instruction:
6732 add t1, r2, #4096
6733 ldr r0, [t1, #4]
6734 ldr r1, [t1, #8]
6735
6736 The idea here is that we want to split out the bits of the constant
6737 as a mask, rather than as subtracting the maximum offset that the
6738 respective type of load/store used can handle.
6739
6740 When encountering negative offsets, we can still utilize it even if
6741 the overall offset is positive; sometimes this may lead to an immediate
6742 that can be constructed with fewer instructions.
6743 For example:
6744 ldr r0, [r2, #0x3FFFFC]
6745
6746 This is best reloaded as:
6747 add t1, r2, #0x400000
6748 ldr r0, [t1, #-4]
6749
6750 The trick for spotting this for a load insn with N bits of offset
6751 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6752 negative offset that is going to make bit N and all the bits below
6753 it become zero in the remainder part.
6754
6755 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6756 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6757 used in most cases of ARM load/store instructions. */
6758
6759 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6760 (((VAL) & ((1 << (N)) - 1)) \
6761 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6762 : 0)
6763
6764 if (coproc_p)
6765 {
6766 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6767
6768 /* NEON quad-word load/stores are made of two double-word accesses,
6769 so the valid index range is reduced by 8. Treat as 9-bit range if
6770 we go over it. */
6771 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6772 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6773 }
6774 else if (GET_MODE_SIZE (mode) == 8)
6775 {
6776 if (TARGET_LDRD)
6777 low = (TARGET_THUMB2
6778 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6779 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6780 else
6781 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6782 to access doublewords. The supported load/store offsets are
6783 -8, -4, and 4, which we try to produce here. */
6784 low = ((val & 0xf) ^ 0x8) - 0x8;
6785 }
6786 else if (GET_MODE_SIZE (mode) < 8)
6787 {
6788 /* NEON element load/stores do not have an offset. */
6789 if (TARGET_NEON_FP16 && mode == HFmode)
6790 return false;
6791
6792 if (TARGET_THUMB2)
6793 {
6794 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6795 Try the wider 12-bit range first, and re-try if the result
6796 is out of range. */
6797 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6798 if (low < -255)
6799 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6800 }
6801 else
6802 {
6803 if (mode == HImode || mode == HFmode)
6804 {
6805 if (arm_arch4)
6806 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6807 else
6808 {
6809 /* The storehi/movhi_bytes fallbacks can use only
6810 [-4094,+4094] of the full ldrb/strb index range. */
6811 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6812 if (low == 4095 || low == -4095)
6813 return false;
6814 }
6815 }
6816 else
6817 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6818 }
6819 }
6820 else
6821 return false;
6822
6823 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6824 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6825 - (unsigned HOST_WIDE_INT) 0x80000000);
6826 /* Check for overflow or zero */
6827 if (low == 0 || high == 0 || (high + low != val))
6828 return false;
6829
6830 /* Reload the high part into a base reg; leave the low part
6831 in the mem. */
6832 *p = gen_rtx_PLUS (GET_MODE (*p),
6833 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6834 GEN_INT (high)),
6835 GEN_INT (low));
6836 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6837 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6838 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6839 return true;
6840 }
6841
6842 return false;
6843 }
6844
6845 rtx
6846 thumb_legitimize_reload_address (rtx *x_p,
6847 enum machine_mode mode,
6848 int opnum, int type,
6849 int ind_levels ATTRIBUTE_UNUSED)
6850 {
6851 rtx x = *x_p;
6852
6853 if (GET_CODE (x) == PLUS
6854 && GET_MODE_SIZE (mode) < 4
6855 && REG_P (XEXP (x, 0))
6856 && XEXP (x, 0) == stack_pointer_rtx
6857 && CONST_INT_P (XEXP (x, 1))
6858 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6859 {
6860 rtx orig_x = x;
6861
6862 x = copy_rtx (x);
6863 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6864 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6865 return x;
6866 }
6867
6868 /* If both registers are hi-regs, then it's better to reload the
6869 entire expression rather than each register individually. That
6870 only requires one reload register rather than two. */
6871 if (GET_CODE (x) == PLUS
6872 && REG_P (XEXP (x, 0))
6873 && REG_P (XEXP (x, 1))
6874 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6875 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6876 {
6877 rtx orig_x = x;
6878
6879 x = copy_rtx (x);
6880 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6881 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6882 return x;
6883 }
6884
6885 return NULL;
6886 }
6887
6888 /* Test for various thread-local symbols. */
6889
6890 /* Return TRUE if X is a thread-local symbol. */
6891
6892 static bool
6893 arm_tls_symbol_p (rtx x)
6894 {
6895 if (! TARGET_HAVE_TLS)
6896 return false;
6897
6898 if (GET_CODE (x) != SYMBOL_REF)
6899 return false;
6900
6901 return SYMBOL_REF_TLS_MODEL (x) != 0;
6902 }
6903
6904 /* Helper for arm_tls_referenced_p. */
6905
6906 static int
6907 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6908 {
6909 if (GET_CODE (*x) == SYMBOL_REF)
6910 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6911
6912 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6913 TLS offsets, not real symbol references. */
6914 if (GET_CODE (*x) == UNSPEC
6915 && XINT (*x, 1) == UNSPEC_TLS)
6916 return -1;
6917
6918 return 0;
6919 }
6920
6921 /* Return TRUE if X contains any TLS symbol references. */
6922
6923 bool
6924 arm_tls_referenced_p (rtx x)
6925 {
6926 if (! TARGET_HAVE_TLS)
6927 return false;
6928
6929 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6930 }
6931
6932 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6933
6934 On the ARM, allow any integer (invalid ones are removed later by insn
6935 patterns), nice doubles and symbol_refs which refer to the function's
6936 constant pool XXX.
6937
6938 When generating pic allow anything. */
6939
6940 static bool
6941 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6942 {
6943 /* At present, we have no support for Neon structure constants, so forbid
6944 them here. It might be possible to handle simple cases like 0 and -1
6945 in future. */
6946 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6947 return false;
6948
6949 return flag_pic || !label_mentioned_p (x);
6950 }
6951
6952 static bool
6953 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6954 {
6955 return (CONST_INT_P (x)
6956 || CONST_DOUBLE_P (x)
6957 || CONSTANT_ADDRESS_P (x)
6958 || flag_pic);
6959 }
6960
6961 static bool
6962 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6963 {
6964 return (!arm_cannot_force_const_mem (mode, x)
6965 && (TARGET_32BIT
6966 ? arm_legitimate_constant_p_1 (mode, x)
6967 : thumb_legitimate_constant_p (mode, x)));
6968 }
6969
6970 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6971
6972 static bool
6973 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6974 {
6975 rtx base, offset;
6976
6977 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6978 {
6979 split_const (x, &base, &offset);
6980 if (GET_CODE (base) == SYMBOL_REF
6981 && !offset_within_block_p (base, INTVAL (offset)))
6982 return true;
6983 }
6984 return arm_tls_referenced_p (x);
6985 }
6986 \f
6987 #define REG_OR_SUBREG_REG(X) \
6988 (REG_P (X) \
6989 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
6990
6991 #define REG_OR_SUBREG_RTX(X) \
6992 (REG_P (X) ? (X) : SUBREG_REG (X))
6993
6994 static inline int
6995 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6996 {
6997 enum machine_mode mode = GET_MODE (x);
6998 int total;
6999
7000 switch (code)
7001 {
7002 case ASHIFT:
7003 case ASHIFTRT:
7004 case LSHIFTRT:
7005 case ROTATERT:
7006 case PLUS:
7007 case MINUS:
7008 case COMPARE:
7009 case NEG:
7010 case NOT:
7011 return COSTS_N_INSNS (1);
7012
7013 case MULT:
7014 if (CONST_INT_P (XEXP (x, 1)))
7015 {
7016 int cycles = 0;
7017 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7018
7019 while (i)
7020 {
7021 i >>= 2;
7022 cycles++;
7023 }
7024 return COSTS_N_INSNS (2) + cycles;
7025 }
7026 return COSTS_N_INSNS (1) + 16;
7027
7028 case SET:
7029 return (COSTS_N_INSNS (1)
7030 + 4 * ((MEM_P (SET_SRC (x)))
7031 + MEM_P (SET_DEST (x))));
7032
7033 case CONST_INT:
7034 if (outer == SET)
7035 {
7036 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7037 return 0;
7038 if (thumb_shiftable_const (INTVAL (x)))
7039 return COSTS_N_INSNS (2);
7040 return COSTS_N_INSNS (3);
7041 }
7042 else if ((outer == PLUS || outer == COMPARE)
7043 && INTVAL (x) < 256 && INTVAL (x) > -256)
7044 return 0;
7045 else if ((outer == IOR || outer == XOR || outer == AND)
7046 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7047 return COSTS_N_INSNS (1);
7048 else if (outer == AND)
7049 {
7050 int i;
7051 /* This duplicates the tests in the andsi3 expander. */
7052 for (i = 9; i <= 31; i++)
7053 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7054 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7055 return COSTS_N_INSNS (2);
7056 }
7057 else if (outer == ASHIFT || outer == ASHIFTRT
7058 || outer == LSHIFTRT)
7059 return 0;
7060 return COSTS_N_INSNS (2);
7061
7062 case CONST:
7063 case CONST_DOUBLE:
7064 case LABEL_REF:
7065 case SYMBOL_REF:
7066 return COSTS_N_INSNS (3);
7067
7068 case UDIV:
7069 case UMOD:
7070 case DIV:
7071 case MOD:
7072 return 100;
7073
7074 case TRUNCATE:
7075 return 99;
7076
7077 case AND:
7078 case XOR:
7079 case IOR:
7080 /* XXX guess. */
7081 return 8;
7082
7083 case MEM:
7084 /* XXX another guess. */
7085 /* Memory costs quite a lot for the first word, but subsequent words
7086 load at the equivalent of a single insn each. */
7087 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7088 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7089 ? 4 : 0));
7090
7091 case IF_THEN_ELSE:
7092 /* XXX a guess. */
7093 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7094 return 14;
7095 return 2;
7096
7097 case SIGN_EXTEND:
7098 case ZERO_EXTEND:
7099 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7100 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7101
7102 if (mode == SImode)
7103 return total;
7104
7105 if (arm_arch6)
7106 return total + COSTS_N_INSNS (1);
7107
7108 /* Assume a two-shift sequence. Increase the cost slightly so
7109 we prefer actual shifts over an extend operation. */
7110 return total + 1 + COSTS_N_INSNS (2);
7111
7112 default:
7113 return 99;
7114 }
7115 }
7116
7117 static inline bool
7118 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7119 {
7120 enum machine_mode mode = GET_MODE (x);
7121 enum rtx_code subcode;
7122 rtx operand;
7123 enum rtx_code code = GET_CODE (x);
7124 *total = 0;
7125
7126 switch (code)
7127 {
7128 case MEM:
7129 /* Memory costs quite a lot for the first word, but subsequent words
7130 load at the equivalent of a single insn each. */
7131 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7132 return true;
7133
7134 case DIV:
7135 case MOD:
7136 case UDIV:
7137 case UMOD:
7138 if (TARGET_HARD_FLOAT && mode == SFmode)
7139 *total = COSTS_N_INSNS (2);
7140 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7141 *total = COSTS_N_INSNS (4);
7142 else
7143 *total = COSTS_N_INSNS (20);
7144 return false;
7145
7146 case ROTATE:
7147 if (REG_P (XEXP (x, 1)))
7148 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7149 else if (!CONST_INT_P (XEXP (x, 1)))
7150 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7151
7152 /* Fall through */
7153 case ROTATERT:
7154 if (mode != SImode)
7155 {
7156 *total += COSTS_N_INSNS (4);
7157 return true;
7158 }
7159
7160 /* Fall through */
7161 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7162 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7163 if (mode == DImode)
7164 {
7165 *total += COSTS_N_INSNS (3);
7166 return true;
7167 }
7168
7169 *total += COSTS_N_INSNS (1);
7170 /* Increase the cost of complex shifts because they aren't any faster,
7171 and reduce dual issue opportunities. */
7172 if (arm_tune_cortex_a9
7173 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7174 ++*total;
7175
7176 return true;
7177
7178 case MINUS:
7179 if (mode == DImode)
7180 {
7181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7182 if (CONST_INT_P (XEXP (x, 0))
7183 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7184 {
7185 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7186 return true;
7187 }
7188
7189 if (CONST_INT_P (XEXP (x, 1))
7190 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7191 {
7192 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7193 return true;
7194 }
7195
7196 return false;
7197 }
7198
7199 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7200 {
7201 if (TARGET_HARD_FLOAT
7202 && (mode == SFmode
7203 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7204 {
7205 *total = COSTS_N_INSNS (1);
7206 if (CONST_DOUBLE_P (XEXP (x, 0))
7207 && arm_const_double_rtx (XEXP (x, 0)))
7208 {
7209 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7210 return true;
7211 }
7212
7213 if (CONST_DOUBLE_P (XEXP (x, 1))
7214 && arm_const_double_rtx (XEXP (x, 1)))
7215 {
7216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7217 return true;
7218 }
7219
7220 return false;
7221 }
7222 *total = COSTS_N_INSNS (20);
7223 return false;
7224 }
7225
7226 *total = COSTS_N_INSNS (1);
7227 if (CONST_INT_P (XEXP (x, 0))
7228 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7229 {
7230 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7231 return true;
7232 }
7233
7234 subcode = GET_CODE (XEXP (x, 1));
7235 if (subcode == ASHIFT || subcode == ASHIFTRT
7236 || subcode == LSHIFTRT
7237 || subcode == ROTATE || subcode == ROTATERT)
7238 {
7239 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7240 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7241 return true;
7242 }
7243
7244 /* A shift as a part of RSB costs no more than RSB itself. */
7245 if (GET_CODE (XEXP (x, 0)) == MULT
7246 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7247 {
7248 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7249 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7250 return true;
7251 }
7252
7253 if (subcode == MULT
7254 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7255 {
7256 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7257 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7258 return true;
7259 }
7260
7261 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7262 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7263 {
7264 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7265 if (REG_P (XEXP (XEXP (x, 1), 0))
7266 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7267 *total += COSTS_N_INSNS (1);
7268
7269 return true;
7270 }
7271
7272 /* Fall through */
7273
7274 case PLUS:
7275 if (code == PLUS && arm_arch6 && mode == SImode
7276 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7277 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7278 {
7279 *total = COSTS_N_INSNS (1);
7280 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7281 0, speed);
7282 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7283 return true;
7284 }
7285
7286 /* MLA: All arguments must be registers. We filter out
7287 multiplication by a power of two, so that we fall down into
7288 the code below. */
7289 if (GET_CODE (XEXP (x, 0)) == MULT
7290 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7291 {
7292 /* The cost comes from the cost of the multiply. */
7293 return false;
7294 }
7295
7296 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7297 {
7298 if (TARGET_HARD_FLOAT
7299 && (mode == SFmode
7300 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7301 {
7302 *total = COSTS_N_INSNS (1);
7303 if (CONST_DOUBLE_P (XEXP (x, 1))
7304 && arm_const_double_rtx (XEXP (x, 1)))
7305 {
7306 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7307 return true;
7308 }
7309
7310 return false;
7311 }
7312
7313 *total = COSTS_N_INSNS (20);
7314 return false;
7315 }
7316
7317 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7318 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7319 {
7320 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7321 if (REG_P (XEXP (XEXP (x, 0), 0))
7322 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7323 *total += COSTS_N_INSNS (1);
7324 return true;
7325 }
7326
7327 /* Fall through */
7328
7329 case AND: case XOR: case IOR:
7330
7331 /* Normally the frame registers will be spilt into reg+const during
7332 reload, so it is a bad idea to combine them with other instructions,
7333 since then they might not be moved outside of loops. As a compromise
7334 we allow integration with ops that have a constant as their second
7335 operand. */
7336 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7337 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7338 && !CONST_INT_P (XEXP (x, 1)))
7339 *total = COSTS_N_INSNS (1);
7340
7341 if (mode == DImode)
7342 {
7343 *total += COSTS_N_INSNS (2);
7344 if (CONST_INT_P (XEXP (x, 1))
7345 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7346 {
7347 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7348 return true;
7349 }
7350
7351 return false;
7352 }
7353
7354 *total += COSTS_N_INSNS (1);
7355 if (CONST_INT_P (XEXP (x, 1))
7356 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7357 {
7358 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7359 return true;
7360 }
7361 subcode = GET_CODE (XEXP (x, 0));
7362 if (subcode == ASHIFT || subcode == ASHIFTRT
7363 || subcode == LSHIFTRT
7364 || subcode == ROTATE || subcode == ROTATERT)
7365 {
7366 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7367 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7368 return true;
7369 }
7370
7371 if (subcode == MULT
7372 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7373 {
7374 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7375 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7376 return true;
7377 }
7378
7379 if (subcode == UMIN || subcode == UMAX
7380 || subcode == SMIN || subcode == SMAX)
7381 {
7382 *total = COSTS_N_INSNS (3);
7383 return true;
7384 }
7385
7386 return false;
7387
7388 case MULT:
7389 /* This should have been handled by the CPU specific routines. */
7390 gcc_unreachable ();
7391
7392 case TRUNCATE:
7393 if (arm_arch3m && mode == SImode
7394 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7396 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7397 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7398 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7399 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7400 {
7401 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7402 return true;
7403 }
7404 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7405 return false;
7406
7407 case NEG:
7408 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7409 {
7410 if (TARGET_HARD_FLOAT
7411 && (mode == SFmode
7412 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7413 {
7414 *total = COSTS_N_INSNS (1);
7415 return false;
7416 }
7417 *total = COSTS_N_INSNS (2);
7418 return false;
7419 }
7420
7421 /* Fall through */
7422 case NOT:
7423 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7424 if (mode == SImode && code == NOT)
7425 {
7426 subcode = GET_CODE (XEXP (x, 0));
7427 if (subcode == ASHIFT || subcode == ASHIFTRT
7428 || subcode == LSHIFTRT
7429 || subcode == ROTATE || subcode == ROTATERT
7430 || (subcode == MULT
7431 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7432 {
7433 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7434 /* Register shifts cost an extra cycle. */
7435 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7436 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7437 subcode, 1, speed);
7438 return true;
7439 }
7440 }
7441
7442 return false;
7443
7444 case IF_THEN_ELSE:
7445 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7446 {
7447 *total = COSTS_N_INSNS (4);
7448 return true;
7449 }
7450
7451 operand = XEXP (x, 0);
7452
7453 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7454 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7455 && REG_P (XEXP (operand, 0))
7456 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7457 *total += COSTS_N_INSNS (1);
7458 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7459 + rtx_cost (XEXP (x, 2), code, 2, speed));
7460 return true;
7461
7462 case NE:
7463 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7464 {
7465 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7466 return true;
7467 }
7468 goto scc_insn;
7469
7470 case GE:
7471 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7472 && mode == SImode && XEXP (x, 1) == const0_rtx)
7473 {
7474 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7475 return true;
7476 }
7477 goto scc_insn;
7478
7479 case LT:
7480 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7481 && mode == SImode && XEXP (x, 1) == const0_rtx)
7482 {
7483 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7484 return true;
7485 }
7486 goto scc_insn;
7487
7488 case EQ:
7489 case GT:
7490 case LE:
7491 case GEU:
7492 case LTU:
7493 case GTU:
7494 case LEU:
7495 case UNORDERED:
7496 case ORDERED:
7497 case UNEQ:
7498 case UNGE:
7499 case UNLT:
7500 case UNGT:
7501 case UNLE:
7502 scc_insn:
7503 /* SCC insns. In the case where the comparison has already been
7504 performed, then they cost 2 instructions. Otherwise they need
7505 an additional comparison before them. */
7506 *total = COSTS_N_INSNS (2);
7507 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7508 {
7509 return true;
7510 }
7511
7512 /* Fall through */
7513 case COMPARE:
7514 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7515 {
7516 *total = 0;
7517 return true;
7518 }
7519
7520 *total += COSTS_N_INSNS (1);
7521 if (CONST_INT_P (XEXP (x, 1))
7522 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7523 {
7524 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7525 return true;
7526 }
7527
7528 subcode = GET_CODE (XEXP (x, 0));
7529 if (subcode == ASHIFT || subcode == ASHIFTRT
7530 || subcode == LSHIFTRT
7531 || subcode == ROTATE || subcode == ROTATERT)
7532 {
7533 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7534 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7535 return true;
7536 }
7537
7538 if (subcode == MULT
7539 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7540 {
7541 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7542 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7543 return true;
7544 }
7545
7546 return false;
7547
7548 case UMIN:
7549 case UMAX:
7550 case SMIN:
7551 case SMAX:
7552 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7553 if (!CONST_INT_P (XEXP (x, 1))
7554 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7555 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7556 return true;
7557
7558 case ABS:
7559 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7560 {
7561 if (TARGET_HARD_FLOAT
7562 && (mode == SFmode
7563 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7564 {
7565 *total = COSTS_N_INSNS (1);
7566 return false;
7567 }
7568 *total = COSTS_N_INSNS (20);
7569 return false;
7570 }
7571 *total = COSTS_N_INSNS (1);
7572 if (mode == DImode)
7573 *total += COSTS_N_INSNS (3);
7574 return false;
7575
7576 case SIGN_EXTEND:
7577 case ZERO_EXTEND:
7578 *total = 0;
7579 if (GET_MODE_CLASS (mode) == MODE_INT)
7580 {
7581 rtx op = XEXP (x, 0);
7582 enum machine_mode opmode = GET_MODE (op);
7583
7584 if (mode == DImode)
7585 *total += COSTS_N_INSNS (1);
7586
7587 if (opmode != SImode)
7588 {
7589 if (MEM_P (op))
7590 {
7591 /* If !arm_arch4, we use one of the extendhisi2_mem
7592 or movhi_bytes patterns for HImode. For a QImode
7593 sign extension, we first zero-extend from memory
7594 and then perform a shift sequence. */
7595 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7596 *total += COSTS_N_INSNS (2);
7597 }
7598 else if (arm_arch6)
7599 *total += COSTS_N_INSNS (1);
7600
7601 /* We don't have the necessary insn, so we need to perform some
7602 other operation. */
7603 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7604 /* An and with constant 255. */
7605 *total += COSTS_N_INSNS (1);
7606 else
7607 /* A shift sequence. Increase costs slightly to avoid
7608 combining two shifts into an extend operation. */
7609 *total += COSTS_N_INSNS (2) + 1;
7610 }
7611
7612 return false;
7613 }
7614
7615 switch (GET_MODE (XEXP (x, 0)))
7616 {
7617 case V8QImode:
7618 case V4HImode:
7619 case V2SImode:
7620 case V4QImode:
7621 case V2HImode:
7622 *total = COSTS_N_INSNS (1);
7623 return false;
7624
7625 default:
7626 gcc_unreachable ();
7627 }
7628 gcc_unreachable ();
7629
7630 case ZERO_EXTRACT:
7631 case SIGN_EXTRACT:
7632 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7633 return true;
7634
7635 case CONST_INT:
7636 if (const_ok_for_arm (INTVAL (x))
7637 || const_ok_for_arm (~INTVAL (x)))
7638 *total = COSTS_N_INSNS (1);
7639 else
7640 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7641 INTVAL (x), NULL_RTX,
7642 NULL_RTX, 0, 0));
7643 return true;
7644
7645 case CONST:
7646 case LABEL_REF:
7647 case SYMBOL_REF:
7648 *total = COSTS_N_INSNS (3);
7649 return true;
7650
7651 case HIGH:
7652 *total = COSTS_N_INSNS (1);
7653 return true;
7654
7655 case LO_SUM:
7656 *total = COSTS_N_INSNS (1);
7657 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7658 return true;
7659
7660 case CONST_DOUBLE:
7661 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7662 && (mode == SFmode || !TARGET_VFP_SINGLE))
7663 *total = COSTS_N_INSNS (1);
7664 else
7665 *total = COSTS_N_INSNS (4);
7666 return true;
7667
7668 case SET:
7669 /* The vec_extract patterns accept memory operands that require an
7670 address reload. Account for the cost of that reload to give the
7671 auto-inc-dec pass an incentive to try to replace them. */
7672 if (TARGET_NEON && MEM_P (SET_DEST (x))
7673 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7674 {
7675 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7676 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7677 *total += COSTS_N_INSNS (1);
7678 return true;
7679 }
7680 /* Likewise for the vec_set patterns. */
7681 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7682 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7683 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7684 {
7685 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7686 *total = rtx_cost (mem, code, 0, speed);
7687 if (!neon_vector_mem_operand (mem, 2))
7688 *total += COSTS_N_INSNS (1);
7689 return true;
7690 }
7691 return false;
7692
7693 case UNSPEC:
7694 /* We cost this as high as our memory costs to allow this to
7695 be hoisted from loops. */
7696 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7697 {
7698 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7699 }
7700 return true;
7701
7702 case CONST_VECTOR:
7703 if (TARGET_NEON
7704 && TARGET_HARD_FLOAT
7705 && outer == SET
7706 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7707 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7708 *total = COSTS_N_INSNS (1);
7709 else
7710 *total = COSTS_N_INSNS (4);
7711 return true;
7712
7713 default:
7714 *total = COSTS_N_INSNS (4);
7715 return false;
7716 }
7717 }
7718
7719 /* Estimates the size cost of thumb1 instructions.
7720 For now most of the code is copied from thumb1_rtx_costs. We need more
7721 fine grain tuning when we have more related test cases. */
7722 static inline int
7723 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7724 {
7725 enum machine_mode mode = GET_MODE (x);
7726
7727 switch (code)
7728 {
7729 case ASHIFT:
7730 case ASHIFTRT:
7731 case LSHIFTRT:
7732 case ROTATERT:
7733 case PLUS:
7734 case MINUS:
7735 case COMPARE:
7736 case NEG:
7737 case NOT:
7738 return COSTS_N_INSNS (1);
7739
7740 case MULT:
7741 if (CONST_INT_P (XEXP (x, 1)))
7742 {
7743 /* Thumb1 mul instruction can't operate on const. We must Load it
7744 into a register first. */
7745 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7746 return COSTS_N_INSNS (1) + const_size;
7747 }
7748 return COSTS_N_INSNS (1);
7749
7750 case SET:
7751 return (COSTS_N_INSNS (1)
7752 + 4 * ((MEM_P (SET_SRC (x)))
7753 + MEM_P (SET_DEST (x))));
7754
7755 case CONST_INT:
7756 if (outer == SET)
7757 {
7758 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7759 return COSTS_N_INSNS (1);
7760 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7761 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7762 return COSTS_N_INSNS (2);
7763 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7764 if (thumb_shiftable_const (INTVAL (x)))
7765 return COSTS_N_INSNS (2);
7766 return COSTS_N_INSNS (3);
7767 }
7768 else if ((outer == PLUS || outer == COMPARE)
7769 && INTVAL (x) < 256 && INTVAL (x) > -256)
7770 return 0;
7771 else if ((outer == IOR || outer == XOR || outer == AND)
7772 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7773 return COSTS_N_INSNS (1);
7774 else if (outer == AND)
7775 {
7776 int i;
7777 /* This duplicates the tests in the andsi3 expander. */
7778 for (i = 9; i <= 31; i++)
7779 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7780 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7781 return COSTS_N_INSNS (2);
7782 }
7783 else if (outer == ASHIFT || outer == ASHIFTRT
7784 || outer == LSHIFTRT)
7785 return 0;
7786 return COSTS_N_INSNS (2);
7787
7788 case CONST:
7789 case CONST_DOUBLE:
7790 case LABEL_REF:
7791 case SYMBOL_REF:
7792 return COSTS_N_INSNS (3);
7793
7794 case UDIV:
7795 case UMOD:
7796 case DIV:
7797 case MOD:
7798 return 100;
7799
7800 case TRUNCATE:
7801 return 99;
7802
7803 case AND:
7804 case XOR:
7805 case IOR:
7806 /* XXX guess. */
7807 return 8;
7808
7809 case MEM:
7810 /* XXX another guess. */
7811 /* Memory costs quite a lot for the first word, but subsequent words
7812 load at the equivalent of a single insn each. */
7813 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7814 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7815 ? 4 : 0));
7816
7817 case IF_THEN_ELSE:
7818 /* XXX a guess. */
7819 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7820 return 14;
7821 return 2;
7822
7823 case ZERO_EXTEND:
7824 /* XXX still guessing. */
7825 switch (GET_MODE (XEXP (x, 0)))
7826 {
7827 case QImode:
7828 return (1 + (mode == DImode ? 4 : 0)
7829 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7830
7831 case HImode:
7832 return (4 + (mode == DImode ? 4 : 0)
7833 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7834
7835 case SImode:
7836 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7837
7838 default:
7839 return 99;
7840 }
7841
7842 default:
7843 return 99;
7844 }
7845 }
7846
7847 /* RTX costs when optimizing for size. */
7848 static bool
7849 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7850 int *total)
7851 {
7852 enum machine_mode mode = GET_MODE (x);
7853 if (TARGET_THUMB1)
7854 {
7855 *total = thumb1_size_rtx_costs (x, code, outer_code);
7856 return true;
7857 }
7858
7859 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7860 switch (code)
7861 {
7862 case MEM:
7863 /* A memory access costs 1 insn if the mode is small, or the address is
7864 a single register, otherwise it costs one insn per word. */
7865 if (REG_P (XEXP (x, 0)))
7866 *total = COSTS_N_INSNS (1);
7867 else if (flag_pic
7868 && GET_CODE (XEXP (x, 0)) == PLUS
7869 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7870 /* This will be split into two instructions.
7871 See arm.md:calculate_pic_address. */
7872 *total = COSTS_N_INSNS (2);
7873 else
7874 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7875 return true;
7876
7877 case DIV:
7878 case MOD:
7879 case UDIV:
7880 case UMOD:
7881 /* Needs a libcall, so it costs about this. */
7882 *total = COSTS_N_INSNS (2);
7883 return false;
7884
7885 case ROTATE:
7886 if (mode == SImode && REG_P (XEXP (x, 1)))
7887 {
7888 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7889 return true;
7890 }
7891 /* Fall through */
7892 case ROTATERT:
7893 case ASHIFT:
7894 case LSHIFTRT:
7895 case ASHIFTRT:
7896 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
7897 {
7898 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7899 return true;
7900 }
7901 else if (mode == SImode)
7902 {
7903 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7904 /* Slightly disparage register shifts, but not by much. */
7905 if (!CONST_INT_P (XEXP (x, 1)))
7906 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7907 return true;
7908 }
7909
7910 /* Needs a libcall. */
7911 *total = COSTS_N_INSNS (2);
7912 return false;
7913
7914 case MINUS:
7915 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7916 && (mode == SFmode || !TARGET_VFP_SINGLE))
7917 {
7918 *total = COSTS_N_INSNS (1);
7919 return false;
7920 }
7921
7922 if (mode == SImode)
7923 {
7924 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7925 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7926
7927 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7928 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7929 || subcode1 == ROTATE || subcode1 == ROTATERT
7930 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7931 || subcode1 == ASHIFTRT)
7932 {
7933 /* It's just the cost of the two operands. */
7934 *total = 0;
7935 return false;
7936 }
7937
7938 *total = COSTS_N_INSNS (1);
7939 return false;
7940 }
7941
7942 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7943 return false;
7944
7945 case PLUS:
7946 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7947 && (mode == SFmode || !TARGET_VFP_SINGLE))
7948 {
7949 *total = COSTS_N_INSNS (1);
7950 return false;
7951 }
7952
7953 /* A shift as a part of ADD costs nothing. */
7954 if (GET_CODE (XEXP (x, 0)) == MULT
7955 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7956 {
7957 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7958 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7959 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7960 return true;
7961 }
7962
7963 /* Fall through */
7964 case AND: case XOR: case IOR:
7965 if (mode == SImode)
7966 {
7967 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7968
7969 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7970 || subcode == LSHIFTRT || subcode == ASHIFTRT
7971 || (code == AND && subcode == NOT))
7972 {
7973 /* It's just the cost of the two operands. */
7974 *total = 0;
7975 return false;
7976 }
7977 }
7978
7979 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7980 return false;
7981
7982 case MULT:
7983 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7984 return false;
7985
7986 case NEG:
7987 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7988 && (mode == SFmode || !TARGET_VFP_SINGLE))
7989 {
7990 *total = COSTS_N_INSNS (1);
7991 return false;
7992 }
7993
7994 /* Fall through */
7995 case NOT:
7996 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7997
7998 return false;
7999
8000 case IF_THEN_ELSE:
8001 *total = 0;
8002 return false;
8003
8004 case COMPARE:
8005 if (cc_register (XEXP (x, 0), VOIDmode))
8006 * total = 0;
8007 else
8008 *total = COSTS_N_INSNS (1);
8009 return false;
8010
8011 case ABS:
8012 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8013 && (mode == SFmode || !TARGET_VFP_SINGLE))
8014 *total = COSTS_N_INSNS (1);
8015 else
8016 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8017 return false;
8018
8019 case SIGN_EXTEND:
8020 case ZERO_EXTEND:
8021 return arm_rtx_costs_1 (x, outer_code, total, 0);
8022
8023 case CONST_INT:
8024 if (const_ok_for_arm (INTVAL (x)))
8025 /* A multiplication by a constant requires another instruction
8026 to load the constant to a register. */
8027 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8028 ? 1 : 0);
8029 else if (const_ok_for_arm (~INTVAL (x)))
8030 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8031 else if (const_ok_for_arm (-INTVAL (x)))
8032 {
8033 if (outer_code == COMPARE || outer_code == PLUS
8034 || outer_code == MINUS)
8035 *total = 0;
8036 else
8037 *total = COSTS_N_INSNS (1);
8038 }
8039 else
8040 *total = COSTS_N_INSNS (2);
8041 return true;
8042
8043 case CONST:
8044 case LABEL_REF:
8045 case SYMBOL_REF:
8046 *total = COSTS_N_INSNS (2);
8047 return true;
8048
8049 case CONST_DOUBLE:
8050 *total = COSTS_N_INSNS (4);
8051 return true;
8052
8053 case CONST_VECTOR:
8054 if (TARGET_NEON
8055 && TARGET_HARD_FLOAT
8056 && outer_code == SET
8057 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8058 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8059 *total = COSTS_N_INSNS (1);
8060 else
8061 *total = COSTS_N_INSNS (4);
8062 return true;
8063
8064 case HIGH:
8065 case LO_SUM:
8066 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8067 cost of these slightly. */
8068 *total = COSTS_N_INSNS (1) + 1;
8069 return true;
8070
8071 case SET:
8072 return false;
8073
8074 default:
8075 if (mode != VOIDmode)
8076 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8077 else
8078 *total = COSTS_N_INSNS (4); /* How knows? */
8079 return false;
8080 }
8081 }
8082
8083 /* RTX costs when optimizing for size. */
8084 static bool
8085 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8086 int *total, bool speed)
8087 {
8088 if (!speed)
8089 return arm_size_rtx_costs (x, (enum rtx_code) code,
8090 (enum rtx_code) outer_code, total);
8091 else
8092 return current_tune->rtx_costs (x, (enum rtx_code) code,
8093 (enum rtx_code) outer_code,
8094 total, speed);
8095 }
8096
8097 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8098 supported on any "slowmul" cores, so it can be ignored. */
8099
8100 static bool
8101 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8102 int *total, bool speed)
8103 {
8104 enum machine_mode mode = GET_MODE (x);
8105
8106 if (TARGET_THUMB)
8107 {
8108 *total = thumb1_rtx_costs (x, code, outer_code);
8109 return true;
8110 }
8111
8112 switch (code)
8113 {
8114 case MULT:
8115 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8116 || mode == DImode)
8117 {
8118 *total = COSTS_N_INSNS (20);
8119 return false;
8120 }
8121
8122 if (CONST_INT_P (XEXP (x, 1)))
8123 {
8124 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8125 & (unsigned HOST_WIDE_INT) 0xffffffff);
8126 int cost, const_ok = const_ok_for_arm (i);
8127 int j, booth_unit_size;
8128
8129 /* Tune as appropriate. */
8130 cost = const_ok ? 4 : 8;
8131 booth_unit_size = 2;
8132 for (j = 0; i && j < 32; j += booth_unit_size)
8133 {
8134 i >>= booth_unit_size;
8135 cost++;
8136 }
8137
8138 *total = COSTS_N_INSNS (cost);
8139 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8140 return true;
8141 }
8142
8143 *total = COSTS_N_INSNS (20);
8144 return false;
8145
8146 default:
8147 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8148 }
8149 }
8150
8151
8152 /* RTX cost for cores with a fast multiply unit (M variants). */
8153
8154 static bool
8155 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8156 int *total, bool speed)
8157 {
8158 enum machine_mode mode = GET_MODE (x);
8159
8160 if (TARGET_THUMB1)
8161 {
8162 *total = thumb1_rtx_costs (x, code, outer_code);
8163 return true;
8164 }
8165
8166 /* ??? should thumb2 use different costs? */
8167 switch (code)
8168 {
8169 case MULT:
8170 /* There is no point basing this on the tuning, since it is always the
8171 fast variant if it exists at all. */
8172 if (mode == DImode
8173 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8174 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8175 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8176 {
8177 *total = COSTS_N_INSNS(2);
8178 return false;
8179 }
8180
8181
8182 if (mode == DImode)
8183 {
8184 *total = COSTS_N_INSNS (5);
8185 return false;
8186 }
8187
8188 if (CONST_INT_P (XEXP (x, 1)))
8189 {
8190 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8191 & (unsigned HOST_WIDE_INT) 0xffffffff);
8192 int cost, const_ok = const_ok_for_arm (i);
8193 int j, booth_unit_size;
8194
8195 /* Tune as appropriate. */
8196 cost = const_ok ? 4 : 8;
8197 booth_unit_size = 8;
8198 for (j = 0; i && j < 32; j += booth_unit_size)
8199 {
8200 i >>= booth_unit_size;
8201 cost++;
8202 }
8203
8204 *total = COSTS_N_INSNS(cost);
8205 return false;
8206 }
8207
8208 if (mode == SImode)
8209 {
8210 *total = COSTS_N_INSNS (4);
8211 return false;
8212 }
8213
8214 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8215 {
8216 if (TARGET_HARD_FLOAT
8217 && (mode == SFmode
8218 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8219 {
8220 *total = COSTS_N_INSNS (1);
8221 return false;
8222 }
8223 }
8224
8225 /* Requires a lib call */
8226 *total = COSTS_N_INSNS (20);
8227 return false;
8228
8229 default:
8230 return arm_rtx_costs_1 (x, outer_code, total, speed);
8231 }
8232 }
8233
8234
8235 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8236 so it can be ignored. */
8237
8238 static bool
8239 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8240 int *total, bool speed)
8241 {
8242 enum machine_mode mode = GET_MODE (x);
8243
8244 if (TARGET_THUMB)
8245 {
8246 *total = thumb1_rtx_costs (x, code, outer_code);
8247 return true;
8248 }
8249
8250 switch (code)
8251 {
8252 case COMPARE:
8253 if (GET_CODE (XEXP (x, 0)) != MULT)
8254 return arm_rtx_costs_1 (x, outer_code, total, speed);
8255
8256 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8257 will stall until the multiplication is complete. */
8258 *total = COSTS_N_INSNS (3);
8259 return false;
8260
8261 case MULT:
8262 /* There is no point basing this on the tuning, since it is always the
8263 fast variant if it exists at all. */
8264 if (mode == DImode
8265 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8266 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8267 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8268 {
8269 *total = COSTS_N_INSNS (2);
8270 return false;
8271 }
8272
8273
8274 if (mode == DImode)
8275 {
8276 *total = COSTS_N_INSNS (5);
8277 return false;
8278 }
8279
8280 if (CONST_INT_P (XEXP (x, 1)))
8281 {
8282 /* If operand 1 is a constant we can more accurately
8283 calculate the cost of the multiply. The multiplier can
8284 retire 15 bits on the first cycle and a further 12 on the
8285 second. We do, of course, have to load the constant into
8286 a register first. */
8287 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8288 /* There's a general overhead of one cycle. */
8289 int cost = 1;
8290 unsigned HOST_WIDE_INT masked_const;
8291
8292 if (i & 0x80000000)
8293 i = ~i;
8294
8295 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8296
8297 masked_const = i & 0xffff8000;
8298 if (masked_const != 0)
8299 {
8300 cost++;
8301 masked_const = i & 0xf8000000;
8302 if (masked_const != 0)
8303 cost++;
8304 }
8305 *total = COSTS_N_INSNS (cost);
8306 return false;
8307 }
8308
8309 if (mode == SImode)
8310 {
8311 *total = COSTS_N_INSNS (3);
8312 return false;
8313 }
8314
8315 /* Requires a lib call */
8316 *total = COSTS_N_INSNS (20);
8317 return false;
8318
8319 default:
8320 return arm_rtx_costs_1 (x, outer_code, total, speed);
8321 }
8322 }
8323
8324
8325 /* RTX costs for 9e (and later) cores. */
8326
8327 static bool
8328 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8329 int *total, bool speed)
8330 {
8331 enum machine_mode mode = GET_MODE (x);
8332
8333 if (TARGET_THUMB1)
8334 {
8335 switch (code)
8336 {
8337 case MULT:
8338 *total = COSTS_N_INSNS (3);
8339 return true;
8340
8341 default:
8342 *total = thumb1_rtx_costs (x, code, outer_code);
8343 return true;
8344 }
8345 }
8346
8347 switch (code)
8348 {
8349 case MULT:
8350 /* There is no point basing this on the tuning, since it is always the
8351 fast variant if it exists at all. */
8352 if (mode == DImode
8353 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8354 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8355 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8356 {
8357 *total = COSTS_N_INSNS (2);
8358 return false;
8359 }
8360
8361
8362 if (mode == DImode)
8363 {
8364 *total = COSTS_N_INSNS (5);
8365 return false;
8366 }
8367
8368 if (mode == SImode)
8369 {
8370 *total = COSTS_N_INSNS (2);
8371 return false;
8372 }
8373
8374 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8375 {
8376 if (TARGET_HARD_FLOAT
8377 && (mode == SFmode
8378 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8379 {
8380 *total = COSTS_N_INSNS (1);
8381 return false;
8382 }
8383 }
8384
8385 *total = COSTS_N_INSNS (20);
8386 return false;
8387
8388 default:
8389 return arm_rtx_costs_1 (x, outer_code, total, speed);
8390 }
8391 }
8392 /* All address computations that can be done are free, but rtx cost returns
8393 the same for practically all of them. So we weight the different types
8394 of address here in the order (most pref first):
8395 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8396 static inline int
8397 arm_arm_address_cost (rtx x)
8398 {
8399 enum rtx_code c = GET_CODE (x);
8400
8401 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8402 return 0;
8403 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8404 return 10;
8405
8406 if (c == PLUS)
8407 {
8408 if (CONST_INT_P (XEXP (x, 1)))
8409 return 2;
8410
8411 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8412 return 3;
8413
8414 return 4;
8415 }
8416
8417 return 6;
8418 }
8419
8420 static inline int
8421 arm_thumb_address_cost (rtx x)
8422 {
8423 enum rtx_code c = GET_CODE (x);
8424
8425 if (c == REG)
8426 return 1;
8427 if (c == PLUS
8428 && REG_P (XEXP (x, 0))
8429 && CONST_INT_P (XEXP (x, 1)))
8430 return 1;
8431
8432 return 2;
8433 }
8434
8435 static int
8436 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8437 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8438 {
8439 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8440 }
8441
8442 /* Adjust cost hook for XScale. */
8443 static bool
8444 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8445 {
8446 /* Some true dependencies can have a higher cost depending
8447 on precisely how certain input operands are used. */
8448 if (REG_NOTE_KIND(link) == 0
8449 && recog_memoized (insn) >= 0
8450 && recog_memoized (dep) >= 0)
8451 {
8452 int shift_opnum = get_attr_shift (insn);
8453 enum attr_type attr_type = get_attr_type (dep);
8454
8455 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8456 operand for INSN. If we have a shifted input operand and the
8457 instruction we depend on is another ALU instruction, then we may
8458 have to account for an additional stall. */
8459 if (shift_opnum != 0
8460 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8461 {
8462 rtx shifted_operand;
8463 int opno;
8464
8465 /* Get the shifted operand. */
8466 extract_insn (insn);
8467 shifted_operand = recog_data.operand[shift_opnum];
8468
8469 /* Iterate over all the operands in DEP. If we write an operand
8470 that overlaps with SHIFTED_OPERAND, then we have increase the
8471 cost of this dependency. */
8472 extract_insn (dep);
8473 preprocess_constraints ();
8474 for (opno = 0; opno < recog_data.n_operands; opno++)
8475 {
8476 /* We can ignore strict inputs. */
8477 if (recog_data.operand_type[opno] == OP_IN)
8478 continue;
8479
8480 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8481 shifted_operand))
8482 {
8483 *cost = 2;
8484 return false;
8485 }
8486 }
8487 }
8488 }
8489 return true;
8490 }
8491
8492 /* Adjust cost hook for Cortex A9. */
8493 static bool
8494 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8495 {
8496 switch (REG_NOTE_KIND (link))
8497 {
8498 case REG_DEP_ANTI:
8499 *cost = 0;
8500 return false;
8501
8502 case REG_DEP_TRUE:
8503 case REG_DEP_OUTPUT:
8504 if (recog_memoized (insn) >= 0
8505 && recog_memoized (dep) >= 0)
8506 {
8507 if (GET_CODE (PATTERN (insn)) == SET)
8508 {
8509 if (GET_MODE_CLASS
8510 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8511 || GET_MODE_CLASS
8512 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8513 {
8514 enum attr_type attr_type_insn = get_attr_type (insn);
8515 enum attr_type attr_type_dep = get_attr_type (dep);
8516
8517 /* By default all dependencies of the form
8518 s0 = s0 <op> s1
8519 s0 = s0 <op> s2
8520 have an extra latency of 1 cycle because
8521 of the input and output dependency in this
8522 case. However this gets modeled as an true
8523 dependency and hence all these checks. */
8524 if (REG_P (SET_DEST (PATTERN (insn)))
8525 && REG_P (SET_DEST (PATTERN (dep)))
8526 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8527 SET_DEST (PATTERN (dep))))
8528 {
8529 /* FMACS is a special case where the dependent
8530 instruction can be issued 3 cycles before
8531 the normal latency in case of an output
8532 dependency. */
8533 if ((attr_type_insn == TYPE_FMACS
8534 || attr_type_insn == TYPE_FMACD)
8535 && (attr_type_dep == TYPE_FMACS
8536 || attr_type_dep == TYPE_FMACD))
8537 {
8538 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8539 *cost = insn_default_latency (dep) - 3;
8540 else
8541 *cost = insn_default_latency (dep);
8542 return false;
8543 }
8544 else
8545 {
8546 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8547 *cost = insn_default_latency (dep) + 1;
8548 else
8549 *cost = insn_default_latency (dep);
8550 }
8551 return false;
8552 }
8553 }
8554 }
8555 }
8556 break;
8557
8558 default:
8559 gcc_unreachable ();
8560 }
8561
8562 return true;
8563 }
8564
8565 /* Adjust cost hook for FA726TE. */
8566 static bool
8567 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8568 {
8569 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8570 have penalty of 3. */
8571 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8572 && recog_memoized (insn) >= 0
8573 && recog_memoized (dep) >= 0
8574 && get_attr_conds (dep) == CONDS_SET)
8575 {
8576 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8577 if (get_attr_conds (insn) == CONDS_USE
8578 && get_attr_type (insn) != TYPE_BRANCH)
8579 {
8580 *cost = 3;
8581 return false;
8582 }
8583
8584 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8585 || get_attr_conds (insn) == CONDS_USE)
8586 {
8587 *cost = 0;
8588 return false;
8589 }
8590 }
8591
8592 return true;
8593 }
8594
8595 /* Implement TARGET_REGISTER_MOVE_COST.
8596
8597 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8598 it is typically more expensive than a single memory access. We set
8599 the cost to less than two memory accesses so that floating
8600 point to integer conversion does not go through memory. */
8601
8602 int
8603 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8604 reg_class_t from, reg_class_t to)
8605 {
8606 if (TARGET_32BIT)
8607 {
8608 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8609 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8610 return 15;
8611 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8612 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8613 return 4;
8614 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8615 return 20;
8616 else
8617 return 2;
8618 }
8619 else
8620 {
8621 if (from == HI_REGS || to == HI_REGS)
8622 return 4;
8623 else
8624 return 2;
8625 }
8626 }
8627
8628 /* Implement TARGET_MEMORY_MOVE_COST. */
8629
8630 int
8631 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8632 bool in ATTRIBUTE_UNUSED)
8633 {
8634 if (TARGET_32BIT)
8635 return 10;
8636 else
8637 {
8638 if (GET_MODE_SIZE (mode) < 4)
8639 return 8;
8640 else
8641 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8642 }
8643 }
8644
8645 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8646 It corrects the value of COST based on the relationship between
8647 INSN and DEP through the dependence LINK. It returns the new
8648 value. There is a per-core adjust_cost hook to adjust scheduler costs
8649 and the per-core hook can choose to completely override the generic
8650 adjust_cost function. Only put bits of code into arm_adjust_cost that
8651 are common across all cores. */
8652 static int
8653 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8654 {
8655 rtx i_pat, d_pat;
8656
8657 /* When generating Thumb-1 code, we want to place flag-setting operations
8658 close to a conditional branch which depends on them, so that we can
8659 omit the comparison. */
8660 if (TARGET_THUMB1
8661 && REG_NOTE_KIND (link) == 0
8662 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8663 && recog_memoized (dep) >= 0
8664 && get_attr_conds (dep) == CONDS_SET)
8665 return 0;
8666
8667 if (current_tune->sched_adjust_cost != NULL)
8668 {
8669 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8670 return cost;
8671 }
8672
8673 /* XXX Is this strictly true? */
8674 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8675 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8676 return 0;
8677
8678 /* Call insns don't incur a stall, even if they follow a load. */
8679 if (REG_NOTE_KIND (link) == 0
8680 && CALL_P (insn))
8681 return 1;
8682
8683 if ((i_pat = single_set (insn)) != NULL
8684 && MEM_P (SET_SRC (i_pat))
8685 && (d_pat = single_set (dep)) != NULL
8686 && MEM_P (SET_DEST (d_pat)))
8687 {
8688 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8689 /* This is a load after a store, there is no conflict if the load reads
8690 from a cached area. Assume that loads from the stack, and from the
8691 constant pool are cached, and that others will miss. This is a
8692 hack. */
8693
8694 if ((GET_CODE (src_mem) == SYMBOL_REF
8695 && CONSTANT_POOL_ADDRESS_P (src_mem))
8696 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8697 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8698 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8699 return 1;
8700 }
8701
8702 return cost;
8703 }
8704
8705 static int
8706 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8707 {
8708 if (TARGET_32BIT)
8709 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8710 else
8711 return (optimize > 0) ? 2 : 0;
8712 }
8713
8714 static int
8715 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8716 {
8717 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8718 }
8719
8720 static bool fp_consts_inited = false;
8721
8722 static REAL_VALUE_TYPE value_fp0;
8723
8724 static void
8725 init_fp_table (void)
8726 {
8727 REAL_VALUE_TYPE r;
8728
8729 r = REAL_VALUE_ATOF ("0", DFmode);
8730 value_fp0 = r;
8731 fp_consts_inited = true;
8732 }
8733
8734 /* Return TRUE if rtx X is a valid immediate FP constant. */
8735 int
8736 arm_const_double_rtx (rtx x)
8737 {
8738 REAL_VALUE_TYPE r;
8739
8740 if (!fp_consts_inited)
8741 init_fp_table ();
8742
8743 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8744 if (REAL_VALUE_MINUS_ZERO (r))
8745 return 0;
8746
8747 if (REAL_VALUES_EQUAL (r, value_fp0))
8748 return 1;
8749
8750 return 0;
8751 }
8752
8753 /* VFPv3 has a fairly wide range of representable immediates, formed from
8754 "quarter-precision" floating-point values. These can be evaluated using this
8755 formula (with ^ for exponentiation):
8756
8757 -1^s * n * 2^-r
8758
8759 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8760 16 <= n <= 31 and 0 <= r <= 7.
8761
8762 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8763
8764 - A (most-significant) is the sign bit.
8765 - BCD are the exponent (encoded as r XOR 3).
8766 - EFGH are the mantissa (encoded as n - 16).
8767 */
8768
8769 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8770 fconst[sd] instruction, or -1 if X isn't suitable. */
8771 static int
8772 vfp3_const_double_index (rtx x)
8773 {
8774 REAL_VALUE_TYPE r, m;
8775 int sign, exponent;
8776 unsigned HOST_WIDE_INT mantissa, mant_hi;
8777 unsigned HOST_WIDE_INT mask;
8778 HOST_WIDE_INT m1, m2;
8779 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8780
8781 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
8782 return -1;
8783
8784 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8785
8786 /* We can't represent these things, so detect them first. */
8787 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8788 return -1;
8789
8790 /* Extract sign, exponent and mantissa. */
8791 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8792 r = real_value_abs (&r);
8793 exponent = REAL_EXP (&r);
8794 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8795 highest (sign) bit, with a fixed binary point at bit point_pos.
8796 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8797 bits for the mantissa, this may fail (low bits would be lost). */
8798 real_ldexp (&m, &r, point_pos - exponent);
8799 REAL_VALUE_TO_INT (&m1, &m2, m);
8800 mantissa = m1;
8801 mant_hi = m2;
8802
8803 /* If there are bits set in the low part of the mantissa, we can't
8804 represent this value. */
8805 if (mantissa != 0)
8806 return -1;
8807
8808 /* Now make it so that mantissa contains the most-significant bits, and move
8809 the point_pos to indicate that the least-significant bits have been
8810 discarded. */
8811 point_pos -= HOST_BITS_PER_WIDE_INT;
8812 mantissa = mant_hi;
8813
8814 /* We can permit four significant bits of mantissa only, plus a high bit
8815 which is always 1. */
8816 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8817 if ((mantissa & mask) != 0)
8818 return -1;
8819
8820 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8821 mantissa >>= point_pos - 5;
8822
8823 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8824 floating-point immediate zero with Neon using an integer-zero load, but
8825 that case is handled elsewhere.) */
8826 if (mantissa == 0)
8827 return -1;
8828
8829 gcc_assert (mantissa >= 16 && mantissa <= 31);
8830
8831 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8832 normalized significands are in the range [1, 2). (Our mantissa is shifted
8833 left 4 places at this point relative to normalized IEEE754 values). GCC
8834 internally uses [0.5, 1) (see real.c), so the exponent returned from
8835 REAL_EXP must be altered. */
8836 exponent = 5 - exponent;
8837
8838 if (exponent < 0 || exponent > 7)
8839 return -1;
8840
8841 /* Sign, mantissa and exponent are now in the correct form to plug into the
8842 formula described in the comment above. */
8843 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8844 }
8845
8846 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8847 int
8848 vfp3_const_double_rtx (rtx x)
8849 {
8850 if (!TARGET_VFP3)
8851 return 0;
8852
8853 return vfp3_const_double_index (x) != -1;
8854 }
8855
8856 /* Recognize immediates which can be used in various Neon instructions. Legal
8857 immediates are described by the following table (for VMVN variants, the
8858 bitwise inverse of the constant shown is recognized. In either case, VMOV
8859 is output and the correct instruction to use for a given constant is chosen
8860 by the assembler). The constant shown is replicated across all elements of
8861 the destination vector.
8862
8863 insn elems variant constant (binary)
8864 ---- ----- ------- -----------------
8865 vmov i32 0 00000000 00000000 00000000 abcdefgh
8866 vmov i32 1 00000000 00000000 abcdefgh 00000000
8867 vmov i32 2 00000000 abcdefgh 00000000 00000000
8868 vmov i32 3 abcdefgh 00000000 00000000 00000000
8869 vmov i16 4 00000000 abcdefgh
8870 vmov i16 5 abcdefgh 00000000
8871 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8872 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8873 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8874 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8875 vmvn i16 10 00000000 abcdefgh
8876 vmvn i16 11 abcdefgh 00000000
8877 vmov i32 12 00000000 00000000 abcdefgh 11111111
8878 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8879 vmov i32 14 00000000 abcdefgh 11111111 11111111
8880 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8881 vmov i8 16 abcdefgh
8882 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8883 eeeeeeee ffffffff gggggggg hhhhhhhh
8884 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8885 vmov f32 19 00000000 00000000 00000000 00000000
8886
8887 For case 18, B = !b. Representable values are exactly those accepted by
8888 vfp3_const_double_index, but are output as floating-point numbers rather
8889 than indices.
8890
8891 For case 19, we will change it to vmov.i32 when assembling.
8892
8893 Variants 0-5 (inclusive) may also be used as immediates for the second
8894 operand of VORR/VBIC instructions.
8895
8896 The INVERSE argument causes the bitwise inverse of the given operand to be
8897 recognized instead (used for recognizing legal immediates for the VAND/VORN
8898 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8899 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8900 output, rather than the real insns vbic/vorr).
8901
8902 INVERSE makes no difference to the recognition of float vectors.
8903
8904 The return value is the variant of immediate as shown in the above table, or
8905 -1 if the given value doesn't match any of the listed patterns.
8906 */
8907 static int
8908 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8909 rtx *modconst, int *elementwidth)
8910 {
8911 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8912 matches = 1; \
8913 for (i = 0; i < idx; i += (STRIDE)) \
8914 if (!(TEST)) \
8915 matches = 0; \
8916 if (matches) \
8917 { \
8918 immtype = (CLASS); \
8919 elsize = (ELSIZE); \
8920 break; \
8921 }
8922
8923 unsigned int i, elsize = 0, idx = 0, n_elts;
8924 unsigned int innersize;
8925 unsigned char bytes[16];
8926 int immtype = -1, matches;
8927 unsigned int invmask = inverse ? 0xff : 0;
8928 bool vector = GET_CODE (op) == CONST_VECTOR;
8929
8930 if (vector)
8931 {
8932 n_elts = CONST_VECTOR_NUNITS (op);
8933 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8934 }
8935 else
8936 {
8937 n_elts = 1;
8938 if (mode == VOIDmode)
8939 mode = DImode;
8940 innersize = GET_MODE_SIZE (mode);
8941 }
8942
8943 /* Vectors of float constants. */
8944 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8945 {
8946 rtx el0 = CONST_VECTOR_ELT (op, 0);
8947 REAL_VALUE_TYPE r0;
8948
8949 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
8950 return -1;
8951
8952 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8953
8954 for (i = 1; i < n_elts; i++)
8955 {
8956 rtx elt = CONST_VECTOR_ELT (op, i);
8957 REAL_VALUE_TYPE re;
8958
8959 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8960
8961 if (!REAL_VALUES_EQUAL (r0, re))
8962 return -1;
8963 }
8964
8965 if (modconst)
8966 *modconst = CONST_VECTOR_ELT (op, 0);
8967
8968 if (elementwidth)
8969 *elementwidth = 0;
8970
8971 if (el0 == CONST0_RTX (GET_MODE (el0)))
8972 return 19;
8973 else
8974 return 18;
8975 }
8976
8977 /* Splat vector constant out into a byte vector. */
8978 for (i = 0; i < n_elts; i++)
8979 {
8980 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8981 unsigned HOST_WIDE_INT elpart;
8982 unsigned int part, parts;
8983
8984 if (CONST_INT_P (el))
8985 {
8986 elpart = INTVAL (el);
8987 parts = 1;
8988 }
8989 else if (CONST_DOUBLE_P (el))
8990 {
8991 elpart = CONST_DOUBLE_LOW (el);
8992 parts = 2;
8993 }
8994 else
8995 gcc_unreachable ();
8996
8997 for (part = 0; part < parts; part++)
8998 {
8999 unsigned int byte;
9000 for (byte = 0; byte < innersize; byte++)
9001 {
9002 bytes[idx++] = (elpart & 0xff) ^ invmask;
9003 elpart >>= BITS_PER_UNIT;
9004 }
9005 if (CONST_DOUBLE_P (el))
9006 elpart = CONST_DOUBLE_HIGH (el);
9007 }
9008 }
9009
9010 /* Sanity check. */
9011 gcc_assert (idx == GET_MODE_SIZE (mode));
9012
9013 do
9014 {
9015 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9016 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9017
9018 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9019 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9020
9021 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9022 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9023
9024 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9025 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9026
9027 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9028
9029 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9030
9031 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9032 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9033
9034 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9035 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9036
9037 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9038 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9039
9040 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9041 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9042
9043 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9044
9045 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9046
9047 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9048 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9049
9050 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9051 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9052
9053 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9054 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9055
9056 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9057 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9058
9059 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9060
9061 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9062 && bytes[i] == bytes[(i + 8) % idx]);
9063 }
9064 while (0);
9065
9066 if (immtype == -1)
9067 return -1;
9068
9069 if (elementwidth)
9070 *elementwidth = elsize;
9071
9072 if (modconst)
9073 {
9074 unsigned HOST_WIDE_INT imm = 0;
9075
9076 /* Un-invert bytes of recognized vector, if necessary. */
9077 if (invmask != 0)
9078 for (i = 0; i < idx; i++)
9079 bytes[i] ^= invmask;
9080
9081 if (immtype == 17)
9082 {
9083 /* FIXME: Broken on 32-bit H_W_I hosts. */
9084 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9085
9086 for (i = 0; i < 8; i++)
9087 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9088 << (i * BITS_PER_UNIT);
9089
9090 *modconst = GEN_INT (imm);
9091 }
9092 else
9093 {
9094 unsigned HOST_WIDE_INT imm = 0;
9095
9096 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9097 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9098
9099 *modconst = GEN_INT (imm);
9100 }
9101 }
9102
9103 return immtype;
9104 #undef CHECK
9105 }
9106
9107 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9108 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9109 float elements), and a modified constant (whatever should be output for a
9110 VMOV) in *MODCONST. */
9111
9112 int
9113 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9114 rtx *modconst, int *elementwidth)
9115 {
9116 rtx tmpconst;
9117 int tmpwidth;
9118 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9119
9120 if (retval == -1)
9121 return 0;
9122
9123 if (modconst)
9124 *modconst = tmpconst;
9125
9126 if (elementwidth)
9127 *elementwidth = tmpwidth;
9128
9129 return 1;
9130 }
9131
9132 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9133 the immediate is valid, write a constant suitable for using as an operand
9134 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9135 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9136
9137 int
9138 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9139 rtx *modconst, int *elementwidth)
9140 {
9141 rtx tmpconst;
9142 int tmpwidth;
9143 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9144
9145 if (retval < 0 || retval > 5)
9146 return 0;
9147
9148 if (modconst)
9149 *modconst = tmpconst;
9150
9151 if (elementwidth)
9152 *elementwidth = tmpwidth;
9153
9154 return 1;
9155 }
9156
9157 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9158 the immediate is valid, write a constant suitable for using as an operand
9159 to VSHR/VSHL to *MODCONST and the corresponding element width to
9160 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9161 because they have different limitations. */
9162
9163 int
9164 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9165 rtx *modconst, int *elementwidth,
9166 bool isleftshift)
9167 {
9168 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9169 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9170 unsigned HOST_WIDE_INT last_elt = 0;
9171 unsigned HOST_WIDE_INT maxshift;
9172
9173 /* Split vector constant out into a byte vector. */
9174 for (i = 0; i < n_elts; i++)
9175 {
9176 rtx el = CONST_VECTOR_ELT (op, i);
9177 unsigned HOST_WIDE_INT elpart;
9178
9179 if (CONST_INT_P (el))
9180 elpart = INTVAL (el);
9181 else if (CONST_DOUBLE_P (el))
9182 return 0;
9183 else
9184 gcc_unreachable ();
9185
9186 if (i != 0 && elpart != last_elt)
9187 return 0;
9188
9189 last_elt = elpart;
9190 }
9191
9192 /* Shift less than element size. */
9193 maxshift = innersize * 8;
9194
9195 if (isleftshift)
9196 {
9197 /* Left shift immediate value can be from 0 to <size>-1. */
9198 if (last_elt >= maxshift)
9199 return 0;
9200 }
9201 else
9202 {
9203 /* Right shift immediate value can be from 1 to <size>. */
9204 if (last_elt == 0 || last_elt > maxshift)
9205 return 0;
9206 }
9207
9208 if (elementwidth)
9209 *elementwidth = innersize * 8;
9210
9211 if (modconst)
9212 *modconst = CONST_VECTOR_ELT (op, 0);
9213
9214 return 1;
9215 }
9216
9217 /* Return a string suitable for output of Neon immediate logic operation
9218 MNEM. */
9219
9220 char *
9221 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9222 int inverse, int quad)
9223 {
9224 int width, is_valid;
9225 static char templ[40];
9226
9227 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9228
9229 gcc_assert (is_valid != 0);
9230
9231 if (quad)
9232 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9233 else
9234 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9235
9236 return templ;
9237 }
9238
9239 /* Return a string suitable for output of Neon immediate shift operation
9240 (VSHR or VSHL) MNEM. */
9241
9242 char *
9243 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9244 enum machine_mode mode, int quad,
9245 bool isleftshift)
9246 {
9247 int width, is_valid;
9248 static char templ[40];
9249
9250 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9251 gcc_assert (is_valid != 0);
9252
9253 if (quad)
9254 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9255 else
9256 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9257
9258 return templ;
9259 }
9260
9261 /* Output a sequence of pairwise operations to implement a reduction.
9262 NOTE: We do "too much work" here, because pairwise operations work on two
9263 registers-worth of operands in one go. Unfortunately we can't exploit those
9264 extra calculations to do the full operation in fewer steps, I don't think.
9265 Although all vector elements of the result but the first are ignored, we
9266 actually calculate the same result in each of the elements. An alternative
9267 such as initially loading a vector with zero to use as each of the second
9268 operands would use up an additional register and take an extra instruction,
9269 for no particular gain. */
9270
9271 void
9272 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9273 rtx (*reduc) (rtx, rtx, rtx))
9274 {
9275 enum machine_mode inner = GET_MODE_INNER (mode);
9276 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9277 rtx tmpsum = op1;
9278
9279 for (i = parts / 2; i >= 1; i /= 2)
9280 {
9281 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9282 emit_insn (reduc (dest, tmpsum, tmpsum));
9283 tmpsum = dest;
9284 }
9285 }
9286
9287 /* If VALS is a vector constant that can be loaded into a register
9288 using VDUP, generate instructions to do so and return an RTX to
9289 assign to the register. Otherwise return NULL_RTX. */
9290
9291 static rtx
9292 neon_vdup_constant (rtx vals)
9293 {
9294 enum machine_mode mode = GET_MODE (vals);
9295 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9296 int n_elts = GET_MODE_NUNITS (mode);
9297 bool all_same = true;
9298 rtx x;
9299 int i;
9300
9301 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9302 return NULL_RTX;
9303
9304 for (i = 0; i < n_elts; ++i)
9305 {
9306 x = XVECEXP (vals, 0, i);
9307 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9308 all_same = false;
9309 }
9310
9311 if (!all_same)
9312 /* The elements are not all the same. We could handle repeating
9313 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9314 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9315 vdup.i16). */
9316 return NULL_RTX;
9317
9318 /* We can load this constant by using VDUP and a constant in a
9319 single ARM register. This will be cheaper than a vector
9320 load. */
9321
9322 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9323 return gen_rtx_VEC_DUPLICATE (mode, x);
9324 }
9325
9326 /* Generate code to load VALS, which is a PARALLEL containing only
9327 constants (for vec_init) or CONST_VECTOR, efficiently into a
9328 register. Returns an RTX to copy into the register, or NULL_RTX
9329 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9330
9331 rtx
9332 neon_make_constant (rtx vals)
9333 {
9334 enum machine_mode mode = GET_MODE (vals);
9335 rtx target;
9336 rtx const_vec = NULL_RTX;
9337 int n_elts = GET_MODE_NUNITS (mode);
9338 int n_const = 0;
9339 int i;
9340
9341 if (GET_CODE (vals) == CONST_VECTOR)
9342 const_vec = vals;
9343 else if (GET_CODE (vals) == PARALLEL)
9344 {
9345 /* A CONST_VECTOR must contain only CONST_INTs and
9346 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9347 Only store valid constants in a CONST_VECTOR. */
9348 for (i = 0; i < n_elts; ++i)
9349 {
9350 rtx x = XVECEXP (vals, 0, i);
9351 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9352 n_const++;
9353 }
9354 if (n_const == n_elts)
9355 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9356 }
9357 else
9358 gcc_unreachable ();
9359
9360 if (const_vec != NULL
9361 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9362 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9363 return const_vec;
9364 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9365 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9366 pipeline cycle; creating the constant takes one or two ARM
9367 pipeline cycles. */
9368 return target;
9369 else if (const_vec != NULL_RTX)
9370 /* Load from constant pool. On Cortex-A8 this takes two cycles
9371 (for either double or quad vectors). We can not take advantage
9372 of single-cycle VLD1 because we need a PC-relative addressing
9373 mode. */
9374 return const_vec;
9375 else
9376 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9377 We can not construct an initializer. */
9378 return NULL_RTX;
9379 }
9380
9381 /* Initialize vector TARGET to VALS. */
9382
9383 void
9384 neon_expand_vector_init (rtx target, rtx vals)
9385 {
9386 enum machine_mode mode = GET_MODE (target);
9387 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9388 int n_elts = GET_MODE_NUNITS (mode);
9389 int n_var = 0, one_var = -1;
9390 bool all_same = true;
9391 rtx x, mem;
9392 int i;
9393
9394 for (i = 0; i < n_elts; ++i)
9395 {
9396 x = XVECEXP (vals, 0, i);
9397 if (!CONSTANT_P (x))
9398 ++n_var, one_var = i;
9399
9400 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9401 all_same = false;
9402 }
9403
9404 if (n_var == 0)
9405 {
9406 rtx constant = neon_make_constant (vals);
9407 if (constant != NULL_RTX)
9408 {
9409 emit_move_insn (target, constant);
9410 return;
9411 }
9412 }
9413
9414 /* Splat a single non-constant element if we can. */
9415 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9416 {
9417 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9418 emit_insn (gen_rtx_SET (VOIDmode, target,
9419 gen_rtx_VEC_DUPLICATE (mode, x)));
9420 return;
9421 }
9422
9423 /* One field is non-constant. Load constant then overwrite varying
9424 field. This is more efficient than using the stack. */
9425 if (n_var == 1)
9426 {
9427 rtx copy = copy_rtx (vals);
9428 rtx index = GEN_INT (one_var);
9429
9430 /* Load constant part of vector, substitute neighboring value for
9431 varying element. */
9432 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9433 neon_expand_vector_init (target, copy);
9434
9435 /* Insert variable. */
9436 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9437 switch (mode)
9438 {
9439 case V8QImode:
9440 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9441 break;
9442 case V16QImode:
9443 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9444 break;
9445 case V4HImode:
9446 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9447 break;
9448 case V8HImode:
9449 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9450 break;
9451 case V2SImode:
9452 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9453 break;
9454 case V4SImode:
9455 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9456 break;
9457 case V2SFmode:
9458 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9459 break;
9460 case V4SFmode:
9461 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9462 break;
9463 case V2DImode:
9464 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9465 break;
9466 default:
9467 gcc_unreachable ();
9468 }
9469 return;
9470 }
9471
9472 /* Construct the vector in memory one field at a time
9473 and load the whole vector. */
9474 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9475 for (i = 0; i < n_elts; i++)
9476 emit_move_insn (adjust_address_nv (mem, inner_mode,
9477 i * GET_MODE_SIZE (inner_mode)),
9478 XVECEXP (vals, 0, i));
9479 emit_move_insn (target, mem);
9480 }
9481
9482 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9483 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9484 reported source locations are bogus. */
9485
9486 static void
9487 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9488 const char *err)
9489 {
9490 HOST_WIDE_INT lane;
9491
9492 gcc_assert (CONST_INT_P (operand));
9493
9494 lane = INTVAL (operand);
9495
9496 if (lane < low || lane >= high)
9497 error (err);
9498 }
9499
9500 /* Bounds-check lanes. */
9501
9502 void
9503 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9504 {
9505 bounds_check (operand, low, high, "lane out of range");
9506 }
9507
9508 /* Bounds-check constants. */
9509
9510 void
9511 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9512 {
9513 bounds_check (operand, low, high, "constant out of range");
9514 }
9515
9516 HOST_WIDE_INT
9517 neon_element_bits (enum machine_mode mode)
9518 {
9519 if (mode == DImode)
9520 return GET_MODE_BITSIZE (mode);
9521 else
9522 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9523 }
9524
9525 \f
9526 /* Predicates for `match_operand' and `match_operator'. */
9527
9528 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9529 WB is true if full writeback address modes are allowed and is false
9530 if limited writeback address modes (POST_INC and PRE_DEC) are
9531 allowed. */
9532
9533 int
9534 arm_coproc_mem_operand (rtx op, bool wb)
9535 {
9536 rtx ind;
9537
9538 /* Reject eliminable registers. */
9539 if (! (reload_in_progress || reload_completed)
9540 && ( reg_mentioned_p (frame_pointer_rtx, op)
9541 || reg_mentioned_p (arg_pointer_rtx, op)
9542 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9543 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9544 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9545 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9546 return FALSE;
9547
9548 /* Constants are converted into offsets from labels. */
9549 if (!MEM_P (op))
9550 return FALSE;
9551
9552 ind = XEXP (op, 0);
9553
9554 if (reload_completed
9555 && (GET_CODE (ind) == LABEL_REF
9556 || (GET_CODE (ind) == CONST
9557 && GET_CODE (XEXP (ind, 0)) == PLUS
9558 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9559 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9560 return TRUE;
9561
9562 /* Match: (mem (reg)). */
9563 if (REG_P (ind))
9564 return arm_address_register_rtx_p (ind, 0);
9565
9566 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9567 acceptable in any case (subject to verification by
9568 arm_address_register_rtx_p). We need WB to be true to accept
9569 PRE_INC and POST_DEC. */
9570 if (GET_CODE (ind) == POST_INC
9571 || GET_CODE (ind) == PRE_DEC
9572 || (wb
9573 && (GET_CODE (ind) == PRE_INC
9574 || GET_CODE (ind) == POST_DEC)))
9575 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9576
9577 if (wb
9578 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9579 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9580 && GET_CODE (XEXP (ind, 1)) == PLUS
9581 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9582 ind = XEXP (ind, 1);
9583
9584 /* Match:
9585 (plus (reg)
9586 (const)). */
9587 if (GET_CODE (ind) == PLUS
9588 && REG_P (XEXP (ind, 0))
9589 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9590 && CONST_INT_P (XEXP (ind, 1))
9591 && INTVAL (XEXP (ind, 1)) > -1024
9592 && INTVAL (XEXP (ind, 1)) < 1024
9593 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9594 return TRUE;
9595
9596 return FALSE;
9597 }
9598
9599 /* Return TRUE if OP is a memory operand which we can load or store a vector
9600 to/from. TYPE is one of the following values:
9601 0 - Vector load/stor (vldr)
9602 1 - Core registers (ldm)
9603 2 - Element/structure loads (vld1)
9604 */
9605 int
9606 neon_vector_mem_operand (rtx op, int type)
9607 {
9608 rtx ind;
9609
9610 /* Reject eliminable registers. */
9611 if (! (reload_in_progress || reload_completed)
9612 && ( reg_mentioned_p (frame_pointer_rtx, op)
9613 || reg_mentioned_p (arg_pointer_rtx, op)
9614 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9615 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9616 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9617 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9618 return FALSE;
9619
9620 /* Constants are converted into offsets from labels. */
9621 if (!MEM_P (op))
9622 return FALSE;
9623
9624 ind = XEXP (op, 0);
9625
9626 if (reload_completed
9627 && (GET_CODE (ind) == LABEL_REF
9628 || (GET_CODE (ind) == CONST
9629 && GET_CODE (XEXP (ind, 0)) == PLUS
9630 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9631 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9632 return TRUE;
9633
9634 /* Match: (mem (reg)). */
9635 if (REG_P (ind))
9636 return arm_address_register_rtx_p (ind, 0);
9637
9638 /* Allow post-increment with Neon registers. */
9639 if ((type != 1 && GET_CODE (ind) == POST_INC)
9640 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9641 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9642
9643 /* FIXME: vld1 allows register post-modify. */
9644
9645 /* Match:
9646 (plus (reg)
9647 (const)). */
9648 if (type == 0
9649 && GET_CODE (ind) == PLUS
9650 && REG_P (XEXP (ind, 0))
9651 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9652 && CONST_INT_P (XEXP (ind, 1))
9653 && INTVAL (XEXP (ind, 1)) > -1024
9654 /* For quad modes, we restrict the constant offset to be slightly less
9655 than what the instruction format permits. We have no such constraint
9656 on double mode offsets. (This must match arm_legitimate_index_p.) */
9657 && (INTVAL (XEXP (ind, 1))
9658 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
9659 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9660 return TRUE;
9661
9662 return FALSE;
9663 }
9664
9665 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9666 type. */
9667 int
9668 neon_struct_mem_operand (rtx op)
9669 {
9670 rtx ind;
9671
9672 /* Reject eliminable registers. */
9673 if (! (reload_in_progress || reload_completed)
9674 && ( reg_mentioned_p (frame_pointer_rtx, op)
9675 || reg_mentioned_p (arg_pointer_rtx, op)
9676 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9677 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9678 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9679 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9680 return FALSE;
9681
9682 /* Constants are converted into offsets from labels. */
9683 if (!MEM_P (op))
9684 return FALSE;
9685
9686 ind = XEXP (op, 0);
9687
9688 if (reload_completed
9689 && (GET_CODE (ind) == LABEL_REF
9690 || (GET_CODE (ind) == CONST
9691 && GET_CODE (XEXP (ind, 0)) == PLUS
9692 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9693 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9694 return TRUE;
9695
9696 /* Match: (mem (reg)). */
9697 if (REG_P (ind))
9698 return arm_address_register_rtx_p (ind, 0);
9699
9700 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9701 if (GET_CODE (ind) == POST_INC
9702 || GET_CODE (ind) == PRE_DEC)
9703 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9704
9705 return FALSE;
9706 }
9707
9708 /* Return true if X is a register that will be eliminated later on. */
9709 int
9710 arm_eliminable_register (rtx x)
9711 {
9712 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9713 || REGNO (x) == ARG_POINTER_REGNUM
9714 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9715 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9716 }
9717
9718 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9719 coprocessor registers. Otherwise return NO_REGS. */
9720
9721 enum reg_class
9722 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9723 {
9724 if (mode == HFmode)
9725 {
9726 if (!TARGET_NEON_FP16)
9727 return GENERAL_REGS;
9728 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9729 return NO_REGS;
9730 return GENERAL_REGS;
9731 }
9732
9733 /* The neon move patterns handle all legitimate vector and struct
9734 addresses. */
9735 if (TARGET_NEON
9736 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9737 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9738 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9739 || VALID_NEON_STRUCT_MODE (mode)))
9740 return NO_REGS;
9741
9742 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9743 return NO_REGS;
9744
9745 return GENERAL_REGS;
9746 }
9747
9748 /* Values which must be returned in the most-significant end of the return
9749 register. */
9750
9751 static bool
9752 arm_return_in_msb (const_tree valtype)
9753 {
9754 return (TARGET_AAPCS_BASED
9755 && BYTES_BIG_ENDIAN
9756 && (AGGREGATE_TYPE_P (valtype)
9757 || TREE_CODE (valtype) == COMPLEX_TYPE
9758 || FIXED_POINT_TYPE_P (valtype)));
9759 }
9760
9761 /* Return TRUE if X references a SYMBOL_REF. */
9762 int
9763 symbol_mentioned_p (rtx x)
9764 {
9765 const char * fmt;
9766 int i;
9767
9768 if (GET_CODE (x) == SYMBOL_REF)
9769 return 1;
9770
9771 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9772 are constant offsets, not symbols. */
9773 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9774 return 0;
9775
9776 fmt = GET_RTX_FORMAT (GET_CODE (x));
9777
9778 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9779 {
9780 if (fmt[i] == 'E')
9781 {
9782 int j;
9783
9784 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9785 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9786 return 1;
9787 }
9788 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9789 return 1;
9790 }
9791
9792 return 0;
9793 }
9794
9795 /* Return TRUE if X references a LABEL_REF. */
9796 int
9797 label_mentioned_p (rtx x)
9798 {
9799 const char * fmt;
9800 int i;
9801
9802 if (GET_CODE (x) == LABEL_REF)
9803 return 1;
9804
9805 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9806 instruction, but they are constant offsets, not symbols. */
9807 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9808 return 0;
9809
9810 fmt = GET_RTX_FORMAT (GET_CODE (x));
9811 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9812 {
9813 if (fmt[i] == 'E')
9814 {
9815 int j;
9816
9817 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9818 if (label_mentioned_p (XVECEXP (x, i, j)))
9819 return 1;
9820 }
9821 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9822 return 1;
9823 }
9824
9825 return 0;
9826 }
9827
9828 int
9829 tls_mentioned_p (rtx x)
9830 {
9831 switch (GET_CODE (x))
9832 {
9833 case CONST:
9834 return tls_mentioned_p (XEXP (x, 0));
9835
9836 case UNSPEC:
9837 if (XINT (x, 1) == UNSPEC_TLS)
9838 return 1;
9839
9840 default:
9841 return 0;
9842 }
9843 }
9844
9845 /* Must not copy any rtx that uses a pc-relative address. */
9846
9847 static int
9848 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9849 {
9850 if (GET_CODE (*x) == UNSPEC
9851 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9852 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9853 return 1;
9854 return 0;
9855 }
9856
9857 static bool
9858 arm_cannot_copy_insn_p (rtx insn)
9859 {
9860 /* The tls call insn cannot be copied, as it is paired with a data
9861 word. */
9862 if (recog_memoized (insn) == CODE_FOR_tlscall)
9863 return true;
9864
9865 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9866 }
9867
9868 enum rtx_code
9869 minmax_code (rtx x)
9870 {
9871 enum rtx_code code = GET_CODE (x);
9872
9873 switch (code)
9874 {
9875 case SMAX:
9876 return GE;
9877 case SMIN:
9878 return LE;
9879 case UMIN:
9880 return LEU;
9881 case UMAX:
9882 return GEU;
9883 default:
9884 gcc_unreachable ();
9885 }
9886 }
9887
9888 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9889
9890 bool
9891 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9892 int *mask, bool *signed_sat)
9893 {
9894 /* The high bound must be a power of two minus one. */
9895 int log = exact_log2 (INTVAL (hi_bound) + 1);
9896 if (log == -1)
9897 return false;
9898
9899 /* The low bound is either zero (for usat) or one less than the
9900 negation of the high bound (for ssat). */
9901 if (INTVAL (lo_bound) == 0)
9902 {
9903 if (mask)
9904 *mask = log;
9905 if (signed_sat)
9906 *signed_sat = false;
9907
9908 return true;
9909 }
9910
9911 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9912 {
9913 if (mask)
9914 *mask = log + 1;
9915 if (signed_sat)
9916 *signed_sat = true;
9917
9918 return true;
9919 }
9920
9921 return false;
9922 }
9923
9924 /* Return 1 if memory locations are adjacent. */
9925 int
9926 adjacent_mem_locations (rtx a, rtx b)
9927 {
9928 /* We don't guarantee to preserve the order of these memory refs. */
9929 if (volatile_refs_p (a) || volatile_refs_p (b))
9930 return 0;
9931
9932 if ((REG_P (XEXP (a, 0))
9933 || (GET_CODE (XEXP (a, 0)) == PLUS
9934 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
9935 && (REG_P (XEXP (b, 0))
9936 || (GET_CODE (XEXP (b, 0)) == PLUS
9937 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
9938 {
9939 HOST_WIDE_INT val0 = 0, val1 = 0;
9940 rtx reg0, reg1;
9941 int val_diff;
9942
9943 if (GET_CODE (XEXP (a, 0)) == PLUS)
9944 {
9945 reg0 = XEXP (XEXP (a, 0), 0);
9946 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9947 }
9948 else
9949 reg0 = XEXP (a, 0);
9950
9951 if (GET_CODE (XEXP (b, 0)) == PLUS)
9952 {
9953 reg1 = XEXP (XEXP (b, 0), 0);
9954 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9955 }
9956 else
9957 reg1 = XEXP (b, 0);
9958
9959 /* Don't accept any offset that will require multiple
9960 instructions to handle, since this would cause the
9961 arith_adjacentmem pattern to output an overlong sequence. */
9962 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9963 return 0;
9964
9965 /* Don't allow an eliminable register: register elimination can make
9966 the offset too large. */
9967 if (arm_eliminable_register (reg0))
9968 return 0;
9969
9970 val_diff = val1 - val0;
9971
9972 if (arm_ld_sched)
9973 {
9974 /* If the target has load delay slots, then there's no benefit
9975 to using an ldm instruction unless the offset is zero and
9976 we are optimizing for size. */
9977 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9978 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9979 && (val_diff == 4 || val_diff == -4));
9980 }
9981
9982 return ((REGNO (reg0) == REGNO (reg1))
9983 && (val_diff == 4 || val_diff == -4));
9984 }
9985
9986 return 0;
9987 }
9988
9989 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9990 for load operations, false for store operations. CONSECUTIVE is true
9991 if the register numbers in the operation must be consecutive in the register
9992 bank. RETURN_PC is true if value is to be loaded in PC.
9993 The pattern we are trying to match for load is:
9994 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9995 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9996 :
9997 :
9998 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9999 ]
10000 where
10001 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10002 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10003 3. If consecutive is TRUE, then for kth register being loaded,
10004 REGNO (R_dk) = REGNO (R_d0) + k.
10005 The pattern for store is similar. */
10006 bool
10007 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10008 bool consecutive, bool return_pc)
10009 {
10010 HOST_WIDE_INT count = XVECLEN (op, 0);
10011 rtx reg, mem, addr;
10012 unsigned regno;
10013 unsigned first_regno;
10014 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10015 rtx elt;
10016 bool addr_reg_in_reglist = false;
10017 bool update = false;
10018 int reg_increment;
10019 int offset_adj;
10020 int regs_per_val;
10021
10022 /* If not in SImode, then registers must be consecutive
10023 (e.g., VLDM instructions for DFmode). */
10024 gcc_assert ((mode == SImode) || consecutive);
10025 /* Setting return_pc for stores is illegal. */
10026 gcc_assert (!return_pc || load);
10027
10028 /* Set up the increments and the regs per val based on the mode. */
10029 reg_increment = GET_MODE_SIZE (mode);
10030 regs_per_val = reg_increment / 4;
10031 offset_adj = return_pc ? 1 : 0;
10032
10033 if (count <= 1
10034 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10035 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10036 return false;
10037
10038 /* Check if this is a write-back. */
10039 elt = XVECEXP (op, 0, offset_adj);
10040 if (GET_CODE (SET_SRC (elt)) == PLUS)
10041 {
10042 i++;
10043 base = 1;
10044 update = true;
10045
10046 /* The offset adjustment must be the number of registers being
10047 popped times the size of a single register. */
10048 if (!REG_P (SET_DEST (elt))
10049 || !REG_P (XEXP (SET_SRC (elt), 0))
10050 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10051 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10052 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10053 ((count - 1 - offset_adj) * reg_increment))
10054 return false;
10055 }
10056
10057 i = i + offset_adj;
10058 base = base + offset_adj;
10059 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10060 success depends on the type: VLDM can do just one reg,
10061 LDM must do at least two. */
10062 if ((count <= i) && (mode == SImode))
10063 return false;
10064
10065 elt = XVECEXP (op, 0, i - 1);
10066 if (GET_CODE (elt) != SET)
10067 return false;
10068
10069 if (load)
10070 {
10071 reg = SET_DEST (elt);
10072 mem = SET_SRC (elt);
10073 }
10074 else
10075 {
10076 reg = SET_SRC (elt);
10077 mem = SET_DEST (elt);
10078 }
10079
10080 if (!REG_P (reg) || !MEM_P (mem))
10081 return false;
10082
10083 regno = REGNO (reg);
10084 first_regno = regno;
10085 addr = XEXP (mem, 0);
10086 if (GET_CODE (addr) == PLUS)
10087 {
10088 if (!CONST_INT_P (XEXP (addr, 1)))
10089 return false;
10090
10091 offset = INTVAL (XEXP (addr, 1));
10092 addr = XEXP (addr, 0);
10093 }
10094
10095 if (!REG_P (addr))
10096 return false;
10097
10098 /* Don't allow SP to be loaded unless it is also the base register. It
10099 guarantees that SP is reset correctly when an LDM instruction
10100 is interruptted. Otherwise, we might end up with a corrupt stack. */
10101 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10102 return false;
10103
10104 for (; i < count; i++)
10105 {
10106 elt = XVECEXP (op, 0, i);
10107 if (GET_CODE (elt) != SET)
10108 return false;
10109
10110 if (load)
10111 {
10112 reg = SET_DEST (elt);
10113 mem = SET_SRC (elt);
10114 }
10115 else
10116 {
10117 reg = SET_SRC (elt);
10118 mem = SET_DEST (elt);
10119 }
10120
10121 if (!REG_P (reg)
10122 || GET_MODE (reg) != mode
10123 || REGNO (reg) <= regno
10124 || (consecutive
10125 && (REGNO (reg) !=
10126 (unsigned int) (first_regno + regs_per_val * (i - base))))
10127 /* Don't allow SP to be loaded unless it is also the base register. It
10128 guarantees that SP is reset correctly when an LDM instruction
10129 is interrupted. Otherwise, we might end up with a corrupt stack. */
10130 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10131 || !MEM_P (mem)
10132 || GET_MODE (mem) != mode
10133 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10134 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10135 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10136 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10137 offset + (i - base) * reg_increment))
10138 && (!REG_P (XEXP (mem, 0))
10139 || offset + (i - base) * reg_increment != 0)))
10140 return false;
10141
10142 regno = REGNO (reg);
10143 if (regno == REGNO (addr))
10144 addr_reg_in_reglist = true;
10145 }
10146
10147 if (load)
10148 {
10149 if (update && addr_reg_in_reglist)
10150 return false;
10151
10152 /* For Thumb-1, address register is always modified - either by write-back
10153 or by explicit load. If the pattern does not describe an update,
10154 then the address register must be in the list of loaded registers. */
10155 if (TARGET_THUMB1)
10156 return update || addr_reg_in_reglist;
10157 }
10158
10159 return true;
10160 }
10161
10162 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10163 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10164 instruction. ADD_OFFSET is nonzero if the base address register needs
10165 to be modified with an add instruction before we can use it. */
10166
10167 static bool
10168 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10169 int nops, HOST_WIDE_INT add_offset)
10170 {
10171 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10172 if the offset isn't small enough. The reason 2 ldrs are faster
10173 is because these ARMs are able to do more than one cache access
10174 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10175 whilst the ARM8 has a double bandwidth cache. This means that
10176 these cores can do both an instruction fetch and a data fetch in
10177 a single cycle, so the trick of calculating the address into a
10178 scratch register (one of the result regs) and then doing a load
10179 multiple actually becomes slower (and no smaller in code size).
10180 That is the transformation
10181
10182 ldr rd1, [rbase + offset]
10183 ldr rd2, [rbase + offset + 4]
10184
10185 to
10186
10187 add rd1, rbase, offset
10188 ldmia rd1, {rd1, rd2}
10189
10190 produces worse code -- '3 cycles + any stalls on rd2' instead of
10191 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10192 access per cycle, the first sequence could never complete in less
10193 than 6 cycles, whereas the ldm sequence would only take 5 and
10194 would make better use of sequential accesses if not hitting the
10195 cache.
10196
10197 We cheat here and test 'arm_ld_sched' which we currently know to
10198 only be true for the ARM8, ARM9 and StrongARM. If this ever
10199 changes, then the test below needs to be reworked. */
10200 if (nops == 2 && arm_ld_sched && add_offset != 0)
10201 return false;
10202
10203 /* XScale has load-store double instructions, but they have stricter
10204 alignment requirements than load-store multiple, so we cannot
10205 use them.
10206
10207 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10208 the pipeline until completion.
10209
10210 NREGS CYCLES
10211 1 3
10212 2 4
10213 3 5
10214 4 6
10215
10216 An ldr instruction takes 1-3 cycles, but does not block the
10217 pipeline.
10218
10219 NREGS CYCLES
10220 1 1-3
10221 2 2-6
10222 3 3-9
10223 4 4-12
10224
10225 Best case ldr will always win. However, the more ldr instructions
10226 we issue, the less likely we are to be able to schedule them well.
10227 Using ldr instructions also increases code size.
10228
10229 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10230 for counts of 3 or 4 regs. */
10231 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10232 return false;
10233 return true;
10234 }
10235
10236 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10237 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10238 an array ORDER which describes the sequence to use when accessing the
10239 offsets that produces an ascending order. In this sequence, each
10240 offset must be larger by exactly 4 than the previous one. ORDER[0]
10241 must have been filled in with the lowest offset by the caller.
10242 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10243 we use to verify that ORDER produces an ascending order of registers.
10244 Return true if it was possible to construct such an order, false if
10245 not. */
10246
10247 static bool
10248 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10249 int *unsorted_regs)
10250 {
10251 int i;
10252 for (i = 1; i < nops; i++)
10253 {
10254 int j;
10255
10256 order[i] = order[i - 1];
10257 for (j = 0; j < nops; j++)
10258 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10259 {
10260 /* We must find exactly one offset that is higher than the
10261 previous one by 4. */
10262 if (order[i] != order[i - 1])
10263 return false;
10264 order[i] = j;
10265 }
10266 if (order[i] == order[i - 1])
10267 return false;
10268 /* The register numbers must be ascending. */
10269 if (unsorted_regs != NULL
10270 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10271 return false;
10272 }
10273 return true;
10274 }
10275
10276 /* Used to determine in a peephole whether a sequence of load
10277 instructions can be changed into a load-multiple instruction.
10278 NOPS is the number of separate load instructions we are examining. The
10279 first NOPS entries in OPERANDS are the destination registers, the
10280 next NOPS entries are memory operands. If this function is
10281 successful, *BASE is set to the common base register of the memory
10282 accesses; *LOAD_OFFSET is set to the first memory location's offset
10283 from that base register.
10284 REGS is an array filled in with the destination register numbers.
10285 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10286 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10287 the sequence of registers in REGS matches the loads from ascending memory
10288 locations, and the function verifies that the register numbers are
10289 themselves ascending. If CHECK_REGS is false, the register numbers
10290 are stored in the order they are found in the operands. */
10291 static int
10292 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10293 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10294 {
10295 int unsorted_regs[MAX_LDM_STM_OPS];
10296 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10297 int order[MAX_LDM_STM_OPS];
10298 rtx base_reg_rtx = NULL;
10299 int base_reg = -1;
10300 int i, ldm_case;
10301
10302 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10303 easily extended if required. */
10304 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10305
10306 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10307
10308 /* Loop over the operands and check that the memory references are
10309 suitable (i.e. immediate offsets from the same base register). At
10310 the same time, extract the target register, and the memory
10311 offsets. */
10312 for (i = 0; i < nops; i++)
10313 {
10314 rtx reg;
10315 rtx offset;
10316
10317 /* Convert a subreg of a mem into the mem itself. */
10318 if (GET_CODE (operands[nops + i]) == SUBREG)
10319 operands[nops + i] = alter_subreg (operands + (nops + i));
10320
10321 gcc_assert (MEM_P (operands[nops + i]));
10322
10323 /* Don't reorder volatile memory references; it doesn't seem worth
10324 looking for the case where the order is ok anyway. */
10325 if (MEM_VOLATILE_P (operands[nops + i]))
10326 return 0;
10327
10328 offset = const0_rtx;
10329
10330 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10331 || (GET_CODE (reg) == SUBREG
10332 && REG_P (reg = SUBREG_REG (reg))))
10333 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10334 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10335 || (GET_CODE (reg) == SUBREG
10336 && REG_P (reg = SUBREG_REG (reg))))
10337 && (CONST_INT_P (offset
10338 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10339 {
10340 if (i == 0)
10341 {
10342 base_reg = REGNO (reg);
10343 base_reg_rtx = reg;
10344 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10345 return 0;
10346 }
10347 else if (base_reg != (int) REGNO (reg))
10348 /* Not addressed from the same base register. */
10349 return 0;
10350
10351 unsorted_regs[i] = (REG_P (operands[i])
10352 ? REGNO (operands[i])
10353 : REGNO (SUBREG_REG (operands[i])));
10354
10355 /* If it isn't an integer register, or if it overwrites the
10356 base register but isn't the last insn in the list, then
10357 we can't do this. */
10358 if (unsorted_regs[i] < 0
10359 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10360 || unsorted_regs[i] > 14
10361 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10362 return 0;
10363
10364 unsorted_offsets[i] = INTVAL (offset);
10365 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10366 order[0] = i;
10367 }
10368 else
10369 /* Not a suitable memory address. */
10370 return 0;
10371 }
10372
10373 /* All the useful information has now been extracted from the
10374 operands into unsorted_regs and unsorted_offsets; additionally,
10375 order[0] has been set to the lowest offset in the list. Sort
10376 the offsets into order, verifying that they are adjacent, and
10377 check that the register numbers are ascending. */
10378 if (!compute_offset_order (nops, unsorted_offsets, order,
10379 check_regs ? unsorted_regs : NULL))
10380 return 0;
10381
10382 if (saved_order)
10383 memcpy (saved_order, order, sizeof order);
10384
10385 if (base)
10386 {
10387 *base = base_reg;
10388
10389 for (i = 0; i < nops; i++)
10390 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10391
10392 *load_offset = unsorted_offsets[order[0]];
10393 }
10394
10395 if (TARGET_THUMB1
10396 && !peep2_reg_dead_p (nops, base_reg_rtx))
10397 return 0;
10398
10399 if (unsorted_offsets[order[0]] == 0)
10400 ldm_case = 1; /* ldmia */
10401 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10402 ldm_case = 2; /* ldmib */
10403 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10404 ldm_case = 3; /* ldmda */
10405 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10406 ldm_case = 4; /* ldmdb */
10407 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10408 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10409 ldm_case = 5;
10410 else
10411 return 0;
10412
10413 if (!multiple_operation_profitable_p (false, nops,
10414 ldm_case == 5
10415 ? unsorted_offsets[order[0]] : 0))
10416 return 0;
10417
10418 return ldm_case;
10419 }
10420
10421 /* Used to determine in a peephole whether a sequence of store instructions can
10422 be changed into a store-multiple instruction.
10423 NOPS is the number of separate store instructions we are examining.
10424 NOPS_TOTAL is the total number of instructions recognized by the peephole
10425 pattern.
10426 The first NOPS entries in OPERANDS are the source registers, the next
10427 NOPS entries are memory operands. If this function is successful, *BASE is
10428 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10429 to the first memory location's offset from that base register. REGS is an
10430 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10431 likewise filled with the corresponding rtx's.
10432 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10433 numbers to an ascending order of stores.
10434 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10435 from ascending memory locations, and the function verifies that the register
10436 numbers are themselves ascending. If CHECK_REGS is false, the register
10437 numbers are stored in the order they are found in the operands. */
10438 static int
10439 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10440 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10441 HOST_WIDE_INT *load_offset, bool check_regs)
10442 {
10443 int unsorted_regs[MAX_LDM_STM_OPS];
10444 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10445 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10446 int order[MAX_LDM_STM_OPS];
10447 int base_reg = -1;
10448 rtx base_reg_rtx = NULL;
10449 int i, stm_case;
10450
10451 /* Write back of base register is currently only supported for Thumb 1. */
10452 int base_writeback = TARGET_THUMB1;
10453
10454 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10455 easily extended if required. */
10456 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10457
10458 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10459
10460 /* Loop over the operands and check that the memory references are
10461 suitable (i.e. immediate offsets from the same base register). At
10462 the same time, extract the target register, and the memory
10463 offsets. */
10464 for (i = 0; i < nops; i++)
10465 {
10466 rtx reg;
10467 rtx offset;
10468
10469 /* Convert a subreg of a mem into the mem itself. */
10470 if (GET_CODE (operands[nops + i]) == SUBREG)
10471 operands[nops + i] = alter_subreg (operands + (nops + i));
10472
10473 gcc_assert (MEM_P (operands[nops + i]));
10474
10475 /* Don't reorder volatile memory references; it doesn't seem worth
10476 looking for the case where the order is ok anyway. */
10477 if (MEM_VOLATILE_P (operands[nops + i]))
10478 return 0;
10479
10480 offset = const0_rtx;
10481
10482 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10483 || (GET_CODE (reg) == SUBREG
10484 && REG_P (reg = SUBREG_REG (reg))))
10485 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10486 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10487 || (GET_CODE (reg) == SUBREG
10488 && REG_P (reg = SUBREG_REG (reg))))
10489 && (CONST_INT_P (offset
10490 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10491 {
10492 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10493 ? operands[i] : SUBREG_REG (operands[i]));
10494 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10495
10496 if (i == 0)
10497 {
10498 base_reg = REGNO (reg);
10499 base_reg_rtx = reg;
10500 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10501 return 0;
10502 }
10503 else if (base_reg != (int) REGNO (reg))
10504 /* Not addressed from the same base register. */
10505 return 0;
10506
10507 /* If it isn't an integer register, then we can't do this. */
10508 if (unsorted_regs[i] < 0
10509 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10510 /* The effects are unpredictable if the base register is
10511 both updated and stored. */
10512 || (base_writeback && unsorted_regs[i] == base_reg)
10513 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10514 || unsorted_regs[i] > 14)
10515 return 0;
10516
10517 unsorted_offsets[i] = INTVAL (offset);
10518 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10519 order[0] = i;
10520 }
10521 else
10522 /* Not a suitable memory address. */
10523 return 0;
10524 }
10525
10526 /* All the useful information has now been extracted from the
10527 operands into unsorted_regs and unsorted_offsets; additionally,
10528 order[0] has been set to the lowest offset in the list. Sort
10529 the offsets into order, verifying that they are adjacent, and
10530 check that the register numbers are ascending. */
10531 if (!compute_offset_order (nops, unsorted_offsets, order,
10532 check_regs ? unsorted_regs : NULL))
10533 return 0;
10534
10535 if (saved_order)
10536 memcpy (saved_order, order, sizeof order);
10537
10538 if (base)
10539 {
10540 *base = base_reg;
10541
10542 for (i = 0; i < nops; i++)
10543 {
10544 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10545 if (reg_rtxs)
10546 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10547 }
10548
10549 *load_offset = unsorted_offsets[order[0]];
10550 }
10551
10552 if (TARGET_THUMB1
10553 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10554 return 0;
10555
10556 if (unsorted_offsets[order[0]] == 0)
10557 stm_case = 1; /* stmia */
10558 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10559 stm_case = 2; /* stmib */
10560 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10561 stm_case = 3; /* stmda */
10562 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10563 stm_case = 4; /* stmdb */
10564 else
10565 return 0;
10566
10567 if (!multiple_operation_profitable_p (false, nops, 0))
10568 return 0;
10569
10570 return stm_case;
10571 }
10572 \f
10573 /* Routines for use in generating RTL. */
10574
10575 /* Generate a load-multiple instruction. COUNT is the number of loads in
10576 the instruction; REGS and MEMS are arrays containing the operands.
10577 BASEREG is the base register to be used in addressing the memory operands.
10578 WBACK_OFFSET is nonzero if the instruction should update the base
10579 register. */
10580
10581 static rtx
10582 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10583 HOST_WIDE_INT wback_offset)
10584 {
10585 int i = 0, j;
10586 rtx result;
10587
10588 if (!multiple_operation_profitable_p (false, count, 0))
10589 {
10590 rtx seq;
10591
10592 start_sequence ();
10593
10594 for (i = 0; i < count; i++)
10595 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10596
10597 if (wback_offset != 0)
10598 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10599
10600 seq = get_insns ();
10601 end_sequence ();
10602
10603 return seq;
10604 }
10605
10606 result = gen_rtx_PARALLEL (VOIDmode,
10607 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10608 if (wback_offset != 0)
10609 {
10610 XVECEXP (result, 0, 0)
10611 = gen_rtx_SET (VOIDmode, basereg,
10612 plus_constant (Pmode, basereg, wback_offset));
10613 i = 1;
10614 count++;
10615 }
10616
10617 for (j = 0; i < count; i++, j++)
10618 XVECEXP (result, 0, i)
10619 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10620
10621 return result;
10622 }
10623
10624 /* Generate a store-multiple instruction. COUNT is the number of stores in
10625 the instruction; REGS and MEMS are arrays containing the operands.
10626 BASEREG is the base register to be used in addressing the memory operands.
10627 WBACK_OFFSET is nonzero if the instruction should update the base
10628 register. */
10629
10630 static rtx
10631 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10632 HOST_WIDE_INT wback_offset)
10633 {
10634 int i = 0, j;
10635 rtx result;
10636
10637 if (GET_CODE (basereg) == PLUS)
10638 basereg = XEXP (basereg, 0);
10639
10640 if (!multiple_operation_profitable_p (false, count, 0))
10641 {
10642 rtx seq;
10643
10644 start_sequence ();
10645
10646 for (i = 0; i < count; i++)
10647 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10648
10649 if (wback_offset != 0)
10650 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10651
10652 seq = get_insns ();
10653 end_sequence ();
10654
10655 return seq;
10656 }
10657
10658 result = gen_rtx_PARALLEL (VOIDmode,
10659 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10660 if (wback_offset != 0)
10661 {
10662 XVECEXP (result, 0, 0)
10663 = gen_rtx_SET (VOIDmode, basereg,
10664 plus_constant (Pmode, basereg, wback_offset));
10665 i = 1;
10666 count++;
10667 }
10668
10669 for (j = 0; i < count; i++, j++)
10670 XVECEXP (result, 0, i)
10671 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10672
10673 return result;
10674 }
10675
10676 /* Generate either a load-multiple or a store-multiple instruction. This
10677 function can be used in situations where we can start with a single MEM
10678 rtx and adjust its address upwards.
10679 COUNT is the number of operations in the instruction, not counting a
10680 possible update of the base register. REGS is an array containing the
10681 register operands.
10682 BASEREG is the base register to be used in addressing the memory operands,
10683 which are constructed from BASEMEM.
10684 WRITE_BACK specifies whether the generated instruction should include an
10685 update of the base register.
10686 OFFSETP is used to pass an offset to and from this function; this offset
10687 is not used when constructing the address (instead BASEMEM should have an
10688 appropriate offset in its address), it is used only for setting
10689 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10690
10691 static rtx
10692 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10693 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10694 {
10695 rtx mems[MAX_LDM_STM_OPS];
10696 HOST_WIDE_INT offset = *offsetp;
10697 int i;
10698
10699 gcc_assert (count <= MAX_LDM_STM_OPS);
10700
10701 if (GET_CODE (basereg) == PLUS)
10702 basereg = XEXP (basereg, 0);
10703
10704 for (i = 0; i < count; i++)
10705 {
10706 rtx addr = plus_constant (Pmode, basereg, i * 4);
10707 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10708 offset += 4;
10709 }
10710
10711 if (write_back)
10712 *offsetp = offset;
10713
10714 if (is_load)
10715 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10716 write_back ? 4 * count : 0);
10717 else
10718 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10719 write_back ? 4 * count : 0);
10720 }
10721
10722 rtx
10723 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10724 rtx basemem, HOST_WIDE_INT *offsetp)
10725 {
10726 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10727 offsetp);
10728 }
10729
10730 rtx
10731 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10732 rtx basemem, HOST_WIDE_INT *offsetp)
10733 {
10734 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10735 offsetp);
10736 }
10737
10738 /* Called from a peephole2 expander to turn a sequence of loads into an
10739 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10740 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10741 is true if we can reorder the registers because they are used commutatively
10742 subsequently.
10743 Returns true iff we could generate a new instruction. */
10744
10745 bool
10746 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10747 {
10748 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10749 rtx mems[MAX_LDM_STM_OPS];
10750 int i, j, base_reg;
10751 rtx base_reg_rtx;
10752 HOST_WIDE_INT offset;
10753 int write_back = FALSE;
10754 int ldm_case;
10755 rtx addr;
10756
10757 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10758 &base_reg, &offset, !sort_regs);
10759
10760 if (ldm_case == 0)
10761 return false;
10762
10763 if (sort_regs)
10764 for (i = 0; i < nops - 1; i++)
10765 for (j = i + 1; j < nops; j++)
10766 if (regs[i] > regs[j])
10767 {
10768 int t = regs[i];
10769 regs[i] = regs[j];
10770 regs[j] = t;
10771 }
10772 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10773
10774 if (TARGET_THUMB1)
10775 {
10776 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10777 gcc_assert (ldm_case == 1 || ldm_case == 5);
10778 write_back = TRUE;
10779 }
10780
10781 if (ldm_case == 5)
10782 {
10783 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10784 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10785 offset = 0;
10786 if (!TARGET_THUMB1)
10787 {
10788 base_reg = regs[0];
10789 base_reg_rtx = newbase;
10790 }
10791 }
10792
10793 for (i = 0; i < nops; i++)
10794 {
10795 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10796 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10797 SImode, addr, 0);
10798 }
10799 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10800 write_back ? offset + i * 4 : 0));
10801 return true;
10802 }
10803
10804 /* Called from a peephole2 expander to turn a sequence of stores into an
10805 STM instruction. OPERANDS are the operands found by the peephole matcher;
10806 NOPS indicates how many separate stores we are trying to combine.
10807 Returns true iff we could generate a new instruction. */
10808
10809 bool
10810 gen_stm_seq (rtx *operands, int nops)
10811 {
10812 int i;
10813 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10814 rtx mems[MAX_LDM_STM_OPS];
10815 int base_reg;
10816 rtx base_reg_rtx;
10817 HOST_WIDE_INT offset;
10818 int write_back = FALSE;
10819 int stm_case;
10820 rtx addr;
10821 bool base_reg_dies;
10822
10823 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10824 mem_order, &base_reg, &offset, true);
10825
10826 if (stm_case == 0)
10827 return false;
10828
10829 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10830
10831 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10832 if (TARGET_THUMB1)
10833 {
10834 gcc_assert (base_reg_dies);
10835 write_back = TRUE;
10836 }
10837
10838 if (stm_case == 5)
10839 {
10840 gcc_assert (base_reg_dies);
10841 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10842 offset = 0;
10843 }
10844
10845 addr = plus_constant (Pmode, base_reg_rtx, offset);
10846
10847 for (i = 0; i < nops; i++)
10848 {
10849 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10850 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10851 SImode, addr, 0);
10852 }
10853 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10854 write_back ? offset + i * 4 : 0));
10855 return true;
10856 }
10857
10858 /* Called from a peephole2 expander to turn a sequence of stores that are
10859 preceded by constant loads into an STM instruction. OPERANDS are the
10860 operands found by the peephole matcher; NOPS indicates how many
10861 separate stores we are trying to combine; there are 2 * NOPS
10862 instructions in the peephole.
10863 Returns true iff we could generate a new instruction. */
10864
10865 bool
10866 gen_const_stm_seq (rtx *operands, int nops)
10867 {
10868 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10869 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10870 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10871 rtx mems[MAX_LDM_STM_OPS];
10872 int base_reg;
10873 rtx base_reg_rtx;
10874 HOST_WIDE_INT offset;
10875 int write_back = FALSE;
10876 int stm_case;
10877 rtx addr;
10878 bool base_reg_dies;
10879 int i, j;
10880 HARD_REG_SET allocated;
10881
10882 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10883 mem_order, &base_reg, &offset, false);
10884
10885 if (stm_case == 0)
10886 return false;
10887
10888 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10889
10890 /* If the same register is used more than once, try to find a free
10891 register. */
10892 CLEAR_HARD_REG_SET (allocated);
10893 for (i = 0; i < nops; i++)
10894 {
10895 for (j = i + 1; j < nops; j++)
10896 if (regs[i] == regs[j])
10897 {
10898 rtx t = peep2_find_free_register (0, nops * 2,
10899 TARGET_THUMB1 ? "l" : "r",
10900 SImode, &allocated);
10901 if (t == NULL_RTX)
10902 return false;
10903 reg_rtxs[i] = t;
10904 regs[i] = REGNO (t);
10905 }
10906 }
10907
10908 /* Compute an ordering that maps the register numbers to an ascending
10909 sequence. */
10910 reg_order[0] = 0;
10911 for (i = 0; i < nops; i++)
10912 if (regs[i] < regs[reg_order[0]])
10913 reg_order[0] = i;
10914
10915 for (i = 1; i < nops; i++)
10916 {
10917 int this_order = reg_order[i - 1];
10918 for (j = 0; j < nops; j++)
10919 if (regs[j] > regs[reg_order[i - 1]]
10920 && (this_order == reg_order[i - 1]
10921 || regs[j] < regs[this_order]))
10922 this_order = j;
10923 reg_order[i] = this_order;
10924 }
10925
10926 /* Ensure that registers that must be live after the instruction end
10927 up with the correct value. */
10928 for (i = 0; i < nops; i++)
10929 {
10930 int this_order = reg_order[i];
10931 if ((this_order != mem_order[i]
10932 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10933 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10934 return false;
10935 }
10936
10937 /* Load the constants. */
10938 for (i = 0; i < nops; i++)
10939 {
10940 rtx op = operands[2 * nops + mem_order[i]];
10941 sorted_regs[i] = regs[reg_order[i]];
10942 emit_move_insn (reg_rtxs[reg_order[i]], op);
10943 }
10944
10945 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10946
10947 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10948 if (TARGET_THUMB1)
10949 {
10950 gcc_assert (base_reg_dies);
10951 write_back = TRUE;
10952 }
10953
10954 if (stm_case == 5)
10955 {
10956 gcc_assert (base_reg_dies);
10957 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10958 offset = 0;
10959 }
10960
10961 addr = plus_constant (Pmode, base_reg_rtx, offset);
10962
10963 for (i = 0; i < nops; i++)
10964 {
10965 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10966 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10967 SImode, addr, 0);
10968 }
10969 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10970 write_back ? offset + i * 4 : 0));
10971 return true;
10972 }
10973
10974 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10975 unaligned copies on processors which support unaligned semantics for those
10976 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10977 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10978 An interleave factor of 1 (the minimum) will perform no interleaving.
10979 Load/store multiple are used for aligned addresses where possible. */
10980
10981 static void
10982 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10983 HOST_WIDE_INT length,
10984 unsigned int interleave_factor)
10985 {
10986 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10987 int *regnos = XALLOCAVEC (int, interleave_factor);
10988 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10989 HOST_WIDE_INT i, j;
10990 HOST_WIDE_INT remaining = length, words;
10991 rtx halfword_tmp = NULL, byte_tmp = NULL;
10992 rtx dst, src;
10993 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10994 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10995 HOST_WIDE_INT srcoffset, dstoffset;
10996 HOST_WIDE_INT src_autoinc, dst_autoinc;
10997 rtx mem, addr;
10998
10999 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11000
11001 /* Use hard registers if we have aligned source or destination so we can use
11002 load/store multiple with contiguous registers. */
11003 if (dst_aligned || src_aligned)
11004 for (i = 0; i < interleave_factor; i++)
11005 regs[i] = gen_rtx_REG (SImode, i);
11006 else
11007 for (i = 0; i < interleave_factor; i++)
11008 regs[i] = gen_reg_rtx (SImode);
11009
11010 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11011 src = copy_addr_to_reg (XEXP (srcbase, 0));
11012
11013 srcoffset = dstoffset = 0;
11014
11015 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11016 For copying the last bytes we want to subtract this offset again. */
11017 src_autoinc = dst_autoinc = 0;
11018
11019 for (i = 0; i < interleave_factor; i++)
11020 regnos[i] = i;
11021
11022 /* Copy BLOCK_SIZE_BYTES chunks. */
11023
11024 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11025 {
11026 /* Load words. */
11027 if (src_aligned && interleave_factor > 1)
11028 {
11029 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11030 TRUE, srcbase, &srcoffset));
11031 src_autoinc += UNITS_PER_WORD * interleave_factor;
11032 }
11033 else
11034 {
11035 for (j = 0; j < interleave_factor; j++)
11036 {
11037 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11038 - src_autoinc));
11039 mem = adjust_automodify_address (srcbase, SImode, addr,
11040 srcoffset + j * UNITS_PER_WORD);
11041 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11042 }
11043 srcoffset += block_size_bytes;
11044 }
11045
11046 /* Store words. */
11047 if (dst_aligned && interleave_factor > 1)
11048 {
11049 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11050 TRUE, dstbase, &dstoffset));
11051 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11052 }
11053 else
11054 {
11055 for (j = 0; j < interleave_factor; j++)
11056 {
11057 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11058 - dst_autoinc));
11059 mem = adjust_automodify_address (dstbase, SImode, addr,
11060 dstoffset + j * UNITS_PER_WORD);
11061 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11062 }
11063 dstoffset += block_size_bytes;
11064 }
11065
11066 remaining -= block_size_bytes;
11067 }
11068
11069 /* Copy any whole words left (note these aren't interleaved with any
11070 subsequent halfword/byte load/stores in the interests of simplicity). */
11071
11072 words = remaining / UNITS_PER_WORD;
11073
11074 gcc_assert (words < interleave_factor);
11075
11076 if (src_aligned && words > 1)
11077 {
11078 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11079 &srcoffset));
11080 src_autoinc += UNITS_PER_WORD * words;
11081 }
11082 else
11083 {
11084 for (j = 0; j < words; j++)
11085 {
11086 addr = plus_constant (Pmode, src,
11087 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11088 mem = adjust_automodify_address (srcbase, SImode, addr,
11089 srcoffset + j * UNITS_PER_WORD);
11090 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11091 }
11092 srcoffset += words * UNITS_PER_WORD;
11093 }
11094
11095 if (dst_aligned && words > 1)
11096 {
11097 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11098 &dstoffset));
11099 dst_autoinc += words * UNITS_PER_WORD;
11100 }
11101 else
11102 {
11103 for (j = 0; j < words; j++)
11104 {
11105 addr = plus_constant (Pmode, dst,
11106 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11107 mem = adjust_automodify_address (dstbase, SImode, addr,
11108 dstoffset + j * UNITS_PER_WORD);
11109 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11110 }
11111 dstoffset += words * UNITS_PER_WORD;
11112 }
11113
11114 remaining -= words * UNITS_PER_WORD;
11115
11116 gcc_assert (remaining < 4);
11117
11118 /* Copy a halfword if necessary. */
11119
11120 if (remaining >= 2)
11121 {
11122 halfword_tmp = gen_reg_rtx (SImode);
11123
11124 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11125 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11126 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11127
11128 /* Either write out immediately, or delay until we've loaded the last
11129 byte, depending on interleave factor. */
11130 if (interleave_factor == 1)
11131 {
11132 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11133 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11134 emit_insn (gen_unaligned_storehi (mem,
11135 gen_lowpart (HImode, halfword_tmp)));
11136 halfword_tmp = NULL;
11137 dstoffset += 2;
11138 }
11139
11140 remaining -= 2;
11141 srcoffset += 2;
11142 }
11143
11144 gcc_assert (remaining < 2);
11145
11146 /* Copy last byte. */
11147
11148 if ((remaining & 1) != 0)
11149 {
11150 byte_tmp = gen_reg_rtx (SImode);
11151
11152 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11153 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11154 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11155
11156 if (interleave_factor == 1)
11157 {
11158 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11159 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11160 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11161 byte_tmp = NULL;
11162 dstoffset++;
11163 }
11164
11165 remaining--;
11166 srcoffset++;
11167 }
11168
11169 /* Store last halfword if we haven't done so already. */
11170
11171 if (halfword_tmp)
11172 {
11173 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11174 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11175 emit_insn (gen_unaligned_storehi (mem,
11176 gen_lowpart (HImode, halfword_tmp)));
11177 dstoffset += 2;
11178 }
11179
11180 /* Likewise for last byte. */
11181
11182 if (byte_tmp)
11183 {
11184 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11185 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11186 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11187 dstoffset++;
11188 }
11189
11190 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11191 }
11192
11193 /* From mips_adjust_block_mem:
11194
11195 Helper function for doing a loop-based block operation on memory
11196 reference MEM. Each iteration of the loop will operate on LENGTH
11197 bytes of MEM.
11198
11199 Create a new base register for use within the loop and point it to
11200 the start of MEM. Create a new memory reference that uses this
11201 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11202
11203 static void
11204 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11205 rtx *loop_mem)
11206 {
11207 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11208
11209 /* Although the new mem does not refer to a known location,
11210 it does keep up to LENGTH bytes of alignment. */
11211 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11212 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11213 }
11214
11215 /* From mips_block_move_loop:
11216
11217 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11218 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11219 the memory regions do not overlap. */
11220
11221 static void
11222 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11223 unsigned int interleave_factor,
11224 HOST_WIDE_INT bytes_per_iter)
11225 {
11226 rtx label, src_reg, dest_reg, final_src, test;
11227 HOST_WIDE_INT leftover;
11228
11229 leftover = length % bytes_per_iter;
11230 length -= leftover;
11231
11232 /* Create registers and memory references for use within the loop. */
11233 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11234 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11235
11236 /* Calculate the value that SRC_REG should have after the last iteration of
11237 the loop. */
11238 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11239 0, 0, OPTAB_WIDEN);
11240
11241 /* Emit the start of the loop. */
11242 label = gen_label_rtx ();
11243 emit_label (label);
11244
11245 /* Emit the loop body. */
11246 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11247 interleave_factor);
11248
11249 /* Move on to the next block. */
11250 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11251 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11252
11253 /* Emit the loop condition. */
11254 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11255 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11256
11257 /* Mop up any left-over bytes. */
11258 if (leftover)
11259 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11260 }
11261
11262 /* Emit a block move when either the source or destination is unaligned (not
11263 aligned to a four-byte boundary). This may need further tuning depending on
11264 core type, optimize_size setting, etc. */
11265
11266 static int
11267 arm_movmemqi_unaligned (rtx *operands)
11268 {
11269 HOST_WIDE_INT length = INTVAL (operands[2]);
11270
11271 if (optimize_size)
11272 {
11273 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11274 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11275 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11276 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11277 or dst_aligned though: allow more interleaving in those cases since the
11278 resulting code can be smaller. */
11279 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11280 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11281
11282 if (length > 12)
11283 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11284 interleave_factor, bytes_per_iter);
11285 else
11286 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11287 interleave_factor);
11288 }
11289 else
11290 {
11291 /* Note that the loop created by arm_block_move_unaligned_loop may be
11292 subject to loop unrolling, which makes tuning this condition a little
11293 redundant. */
11294 if (length > 32)
11295 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11296 else
11297 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11298 }
11299
11300 return 1;
11301 }
11302
11303 int
11304 arm_gen_movmemqi (rtx *operands)
11305 {
11306 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11307 HOST_WIDE_INT srcoffset, dstoffset;
11308 int i;
11309 rtx src, dst, srcbase, dstbase;
11310 rtx part_bytes_reg = NULL;
11311 rtx mem;
11312
11313 if (!CONST_INT_P (operands[2])
11314 || !CONST_INT_P (operands[3])
11315 || INTVAL (operands[2]) > 64)
11316 return 0;
11317
11318 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11319 return arm_movmemqi_unaligned (operands);
11320
11321 if (INTVAL (operands[3]) & 3)
11322 return 0;
11323
11324 dstbase = operands[0];
11325 srcbase = operands[1];
11326
11327 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11328 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11329
11330 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11331 out_words_to_go = INTVAL (operands[2]) / 4;
11332 last_bytes = INTVAL (operands[2]) & 3;
11333 dstoffset = srcoffset = 0;
11334
11335 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11336 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11337
11338 for (i = 0; in_words_to_go >= 2; i+=4)
11339 {
11340 if (in_words_to_go > 4)
11341 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11342 TRUE, srcbase, &srcoffset));
11343 else
11344 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11345 src, FALSE, srcbase,
11346 &srcoffset));
11347
11348 if (out_words_to_go)
11349 {
11350 if (out_words_to_go > 4)
11351 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11352 TRUE, dstbase, &dstoffset));
11353 else if (out_words_to_go != 1)
11354 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11355 out_words_to_go, dst,
11356 (last_bytes == 0
11357 ? FALSE : TRUE),
11358 dstbase, &dstoffset));
11359 else
11360 {
11361 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11362 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11363 if (last_bytes != 0)
11364 {
11365 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11366 dstoffset += 4;
11367 }
11368 }
11369 }
11370
11371 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11372 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11373 }
11374
11375 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11376 if (out_words_to_go)
11377 {
11378 rtx sreg;
11379
11380 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11381 sreg = copy_to_reg (mem);
11382
11383 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11384 emit_move_insn (mem, sreg);
11385 in_words_to_go--;
11386
11387 gcc_assert (!in_words_to_go); /* Sanity check */
11388 }
11389
11390 if (in_words_to_go)
11391 {
11392 gcc_assert (in_words_to_go > 0);
11393
11394 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11395 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11396 }
11397
11398 gcc_assert (!last_bytes || part_bytes_reg);
11399
11400 if (BYTES_BIG_ENDIAN && last_bytes)
11401 {
11402 rtx tmp = gen_reg_rtx (SImode);
11403
11404 /* The bytes we want are in the top end of the word. */
11405 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11406 GEN_INT (8 * (4 - last_bytes))));
11407 part_bytes_reg = tmp;
11408
11409 while (last_bytes)
11410 {
11411 mem = adjust_automodify_address (dstbase, QImode,
11412 plus_constant (Pmode, dst,
11413 last_bytes - 1),
11414 dstoffset + last_bytes - 1);
11415 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11416
11417 if (--last_bytes)
11418 {
11419 tmp = gen_reg_rtx (SImode);
11420 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11421 part_bytes_reg = tmp;
11422 }
11423 }
11424
11425 }
11426 else
11427 {
11428 if (last_bytes > 1)
11429 {
11430 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11431 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11432 last_bytes -= 2;
11433 if (last_bytes)
11434 {
11435 rtx tmp = gen_reg_rtx (SImode);
11436 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11437 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11438 part_bytes_reg = tmp;
11439 dstoffset += 2;
11440 }
11441 }
11442
11443 if (last_bytes)
11444 {
11445 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11446 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11447 }
11448 }
11449
11450 return 1;
11451 }
11452
11453 /* Select a dominance comparison mode if possible for a test of the general
11454 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11455 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11456 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11457 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11458 In all cases OP will be either EQ or NE, but we don't need to know which
11459 here. If we are unable to support a dominance comparison we return
11460 CC mode. This will then fail to match for the RTL expressions that
11461 generate this call. */
11462 enum machine_mode
11463 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11464 {
11465 enum rtx_code cond1, cond2;
11466 int swapped = 0;
11467
11468 /* Currently we will probably get the wrong result if the individual
11469 comparisons are not simple. This also ensures that it is safe to
11470 reverse a comparison if necessary. */
11471 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11472 != CCmode)
11473 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11474 != CCmode))
11475 return CCmode;
11476
11477 /* The if_then_else variant of this tests the second condition if the
11478 first passes, but is true if the first fails. Reverse the first
11479 condition to get a true "inclusive-or" expression. */
11480 if (cond_or == DOM_CC_NX_OR_Y)
11481 cond1 = reverse_condition (cond1);
11482
11483 /* If the comparisons are not equal, and one doesn't dominate the other,
11484 then we can't do this. */
11485 if (cond1 != cond2
11486 && !comparison_dominates_p (cond1, cond2)
11487 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11488 return CCmode;
11489
11490 if (swapped)
11491 {
11492 enum rtx_code temp = cond1;
11493 cond1 = cond2;
11494 cond2 = temp;
11495 }
11496
11497 switch (cond1)
11498 {
11499 case EQ:
11500 if (cond_or == DOM_CC_X_AND_Y)
11501 return CC_DEQmode;
11502
11503 switch (cond2)
11504 {
11505 case EQ: return CC_DEQmode;
11506 case LE: return CC_DLEmode;
11507 case LEU: return CC_DLEUmode;
11508 case GE: return CC_DGEmode;
11509 case GEU: return CC_DGEUmode;
11510 default: gcc_unreachable ();
11511 }
11512
11513 case LT:
11514 if (cond_or == DOM_CC_X_AND_Y)
11515 return CC_DLTmode;
11516
11517 switch (cond2)
11518 {
11519 case LT:
11520 return CC_DLTmode;
11521 case LE:
11522 return CC_DLEmode;
11523 case NE:
11524 return CC_DNEmode;
11525 default:
11526 gcc_unreachable ();
11527 }
11528
11529 case GT:
11530 if (cond_or == DOM_CC_X_AND_Y)
11531 return CC_DGTmode;
11532
11533 switch (cond2)
11534 {
11535 case GT:
11536 return CC_DGTmode;
11537 case GE:
11538 return CC_DGEmode;
11539 case NE:
11540 return CC_DNEmode;
11541 default:
11542 gcc_unreachable ();
11543 }
11544
11545 case LTU:
11546 if (cond_or == DOM_CC_X_AND_Y)
11547 return CC_DLTUmode;
11548
11549 switch (cond2)
11550 {
11551 case LTU:
11552 return CC_DLTUmode;
11553 case LEU:
11554 return CC_DLEUmode;
11555 case NE:
11556 return CC_DNEmode;
11557 default:
11558 gcc_unreachable ();
11559 }
11560
11561 case GTU:
11562 if (cond_or == DOM_CC_X_AND_Y)
11563 return CC_DGTUmode;
11564
11565 switch (cond2)
11566 {
11567 case GTU:
11568 return CC_DGTUmode;
11569 case GEU:
11570 return CC_DGEUmode;
11571 case NE:
11572 return CC_DNEmode;
11573 default:
11574 gcc_unreachable ();
11575 }
11576
11577 /* The remaining cases only occur when both comparisons are the
11578 same. */
11579 case NE:
11580 gcc_assert (cond1 == cond2);
11581 return CC_DNEmode;
11582
11583 case LE:
11584 gcc_assert (cond1 == cond2);
11585 return CC_DLEmode;
11586
11587 case GE:
11588 gcc_assert (cond1 == cond2);
11589 return CC_DGEmode;
11590
11591 case LEU:
11592 gcc_assert (cond1 == cond2);
11593 return CC_DLEUmode;
11594
11595 case GEU:
11596 gcc_assert (cond1 == cond2);
11597 return CC_DGEUmode;
11598
11599 default:
11600 gcc_unreachable ();
11601 }
11602 }
11603
11604 enum machine_mode
11605 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11606 {
11607 /* All floating point compares return CCFP if it is an equality
11608 comparison, and CCFPE otherwise. */
11609 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11610 {
11611 switch (op)
11612 {
11613 case EQ:
11614 case NE:
11615 case UNORDERED:
11616 case ORDERED:
11617 case UNLT:
11618 case UNLE:
11619 case UNGT:
11620 case UNGE:
11621 case UNEQ:
11622 case LTGT:
11623 return CCFPmode;
11624
11625 case LT:
11626 case LE:
11627 case GT:
11628 case GE:
11629 return CCFPEmode;
11630
11631 default:
11632 gcc_unreachable ();
11633 }
11634 }
11635
11636 /* A compare with a shifted operand. Because of canonicalization, the
11637 comparison will have to be swapped when we emit the assembler. */
11638 if (GET_MODE (y) == SImode
11639 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11640 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11641 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11642 || GET_CODE (x) == ROTATERT))
11643 return CC_SWPmode;
11644
11645 /* This operation is performed swapped, but since we only rely on the Z
11646 flag we don't need an additional mode. */
11647 if (GET_MODE (y) == SImode
11648 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11649 && GET_CODE (x) == NEG
11650 && (op == EQ || op == NE))
11651 return CC_Zmode;
11652
11653 /* This is a special case that is used by combine to allow a
11654 comparison of a shifted byte load to be split into a zero-extend
11655 followed by a comparison of the shifted integer (only valid for
11656 equalities and unsigned inequalities). */
11657 if (GET_MODE (x) == SImode
11658 && GET_CODE (x) == ASHIFT
11659 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
11660 && GET_CODE (XEXP (x, 0)) == SUBREG
11661 && MEM_P (SUBREG_REG (XEXP (x, 0)))
11662 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11663 && (op == EQ || op == NE
11664 || op == GEU || op == GTU || op == LTU || op == LEU)
11665 && CONST_INT_P (y))
11666 return CC_Zmode;
11667
11668 /* A construct for a conditional compare, if the false arm contains
11669 0, then both conditions must be true, otherwise either condition
11670 must be true. Not all conditions are possible, so CCmode is
11671 returned if it can't be done. */
11672 if (GET_CODE (x) == IF_THEN_ELSE
11673 && (XEXP (x, 2) == const0_rtx
11674 || XEXP (x, 2) == const1_rtx)
11675 && COMPARISON_P (XEXP (x, 0))
11676 && COMPARISON_P (XEXP (x, 1)))
11677 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11678 INTVAL (XEXP (x, 2)));
11679
11680 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11681 if (GET_CODE (x) == AND
11682 && (op == EQ || op == NE)
11683 && COMPARISON_P (XEXP (x, 0))
11684 && COMPARISON_P (XEXP (x, 1)))
11685 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11686 DOM_CC_X_AND_Y);
11687
11688 if (GET_CODE (x) == IOR
11689 && (op == EQ || op == NE)
11690 && COMPARISON_P (XEXP (x, 0))
11691 && COMPARISON_P (XEXP (x, 1)))
11692 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11693 DOM_CC_X_OR_Y);
11694
11695 /* An operation (on Thumb) where we want to test for a single bit.
11696 This is done by shifting that bit up into the top bit of a
11697 scratch register; we can then branch on the sign bit. */
11698 if (TARGET_THUMB1
11699 && GET_MODE (x) == SImode
11700 && (op == EQ || op == NE)
11701 && GET_CODE (x) == ZERO_EXTRACT
11702 && XEXP (x, 1) == const1_rtx)
11703 return CC_Nmode;
11704
11705 /* An operation that sets the condition codes as a side-effect, the
11706 V flag is not set correctly, so we can only use comparisons where
11707 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11708 instead.) */
11709 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11710 if (GET_MODE (x) == SImode
11711 && y == const0_rtx
11712 && (op == EQ || op == NE || op == LT || op == GE)
11713 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11714 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11715 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11716 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11717 || GET_CODE (x) == LSHIFTRT
11718 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11719 || GET_CODE (x) == ROTATERT
11720 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11721 return CC_NOOVmode;
11722
11723 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11724 return CC_Zmode;
11725
11726 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11727 && GET_CODE (x) == PLUS
11728 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11729 return CC_Cmode;
11730
11731 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11732 {
11733 switch (op)
11734 {
11735 case EQ:
11736 case NE:
11737 /* A DImode comparison against zero can be implemented by
11738 or'ing the two halves together. */
11739 if (y == const0_rtx)
11740 return CC_Zmode;
11741
11742 /* We can do an equality test in three Thumb instructions. */
11743 if (!TARGET_32BIT)
11744 return CC_Zmode;
11745
11746 /* FALLTHROUGH */
11747
11748 case LTU:
11749 case LEU:
11750 case GTU:
11751 case GEU:
11752 /* DImode unsigned comparisons can be implemented by cmp +
11753 cmpeq without a scratch register. Not worth doing in
11754 Thumb-2. */
11755 if (TARGET_32BIT)
11756 return CC_CZmode;
11757
11758 /* FALLTHROUGH */
11759
11760 case LT:
11761 case LE:
11762 case GT:
11763 case GE:
11764 /* DImode signed and unsigned comparisons can be implemented
11765 by cmp + sbcs with a scratch register, but that does not
11766 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11767 gcc_assert (op != EQ && op != NE);
11768 return CC_NCVmode;
11769
11770 default:
11771 gcc_unreachable ();
11772 }
11773 }
11774
11775 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11776 return GET_MODE (x);
11777
11778 return CCmode;
11779 }
11780
11781 /* X and Y are two things to compare using CODE. Emit the compare insn and
11782 return the rtx for register 0 in the proper mode. FP means this is a
11783 floating point compare: I don't think that it is needed on the arm. */
11784 rtx
11785 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11786 {
11787 enum machine_mode mode;
11788 rtx cc_reg;
11789 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11790
11791 /* We might have X as a constant, Y as a register because of the predicates
11792 used for cmpdi. If so, force X to a register here. */
11793 if (dimode_comparison && !REG_P (x))
11794 x = force_reg (DImode, x);
11795
11796 mode = SELECT_CC_MODE (code, x, y);
11797 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11798
11799 if (dimode_comparison
11800 && mode != CC_CZmode)
11801 {
11802 rtx clobber, set;
11803
11804 /* To compare two non-zero values for equality, XOR them and
11805 then compare against zero. Not used for ARM mode; there
11806 CC_CZmode is cheaper. */
11807 if (mode == CC_Zmode && y != const0_rtx)
11808 {
11809 gcc_assert (!reload_completed);
11810 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11811 y = const0_rtx;
11812 }
11813
11814 /* A scratch register is required. */
11815 if (reload_completed)
11816 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11817 else
11818 scratch = gen_rtx_SCRATCH (SImode);
11819
11820 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11821 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11822 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11823 }
11824 else
11825 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11826
11827 return cc_reg;
11828 }
11829
11830 /* Generate a sequence of insns that will generate the correct return
11831 address mask depending on the physical architecture that the program
11832 is running on. */
11833 rtx
11834 arm_gen_return_addr_mask (void)
11835 {
11836 rtx reg = gen_reg_rtx (Pmode);
11837
11838 emit_insn (gen_return_addr_mask (reg));
11839 return reg;
11840 }
11841
11842 void
11843 arm_reload_in_hi (rtx *operands)
11844 {
11845 rtx ref = operands[1];
11846 rtx base, scratch;
11847 HOST_WIDE_INT offset = 0;
11848
11849 if (GET_CODE (ref) == SUBREG)
11850 {
11851 offset = SUBREG_BYTE (ref);
11852 ref = SUBREG_REG (ref);
11853 }
11854
11855 if (REG_P (ref))
11856 {
11857 /* We have a pseudo which has been spilt onto the stack; there
11858 are two cases here: the first where there is a simple
11859 stack-slot replacement and a second where the stack-slot is
11860 out of range, or is used as a subreg. */
11861 if (reg_equiv_mem (REGNO (ref)))
11862 {
11863 ref = reg_equiv_mem (REGNO (ref));
11864 base = find_replacement (&XEXP (ref, 0));
11865 }
11866 else
11867 /* The slot is out of range, or was dressed up in a SUBREG. */
11868 base = reg_equiv_address (REGNO (ref));
11869 }
11870 else
11871 base = find_replacement (&XEXP (ref, 0));
11872
11873 /* Handle the case where the address is too complex to be offset by 1. */
11874 if (GET_CODE (base) == MINUS
11875 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
11876 {
11877 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11878
11879 emit_set_insn (base_plus, base);
11880 base = base_plus;
11881 }
11882 else if (GET_CODE (base) == PLUS)
11883 {
11884 /* The addend must be CONST_INT, or we would have dealt with it above. */
11885 HOST_WIDE_INT hi, lo;
11886
11887 offset += INTVAL (XEXP (base, 1));
11888 base = XEXP (base, 0);
11889
11890 /* Rework the address into a legal sequence of insns. */
11891 /* Valid range for lo is -4095 -> 4095 */
11892 lo = (offset >= 0
11893 ? (offset & 0xfff)
11894 : -((-offset) & 0xfff));
11895
11896 /* Corner case, if lo is the max offset then we would be out of range
11897 once we have added the additional 1 below, so bump the msb into the
11898 pre-loading insn(s). */
11899 if (lo == 4095)
11900 lo &= 0x7ff;
11901
11902 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11903 ^ (HOST_WIDE_INT) 0x80000000)
11904 - (HOST_WIDE_INT) 0x80000000);
11905
11906 gcc_assert (hi + lo == offset);
11907
11908 if (hi != 0)
11909 {
11910 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11911
11912 /* Get the base address; addsi3 knows how to handle constants
11913 that require more than one insn. */
11914 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11915 base = base_plus;
11916 offset = lo;
11917 }
11918 }
11919
11920 /* Operands[2] may overlap operands[0] (though it won't overlap
11921 operands[1]), that's why we asked for a DImode reg -- so we can
11922 use the bit that does not overlap. */
11923 if (REGNO (operands[2]) == REGNO (operands[0]))
11924 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11925 else
11926 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11927
11928 emit_insn (gen_zero_extendqisi2 (scratch,
11929 gen_rtx_MEM (QImode,
11930 plus_constant (Pmode, base,
11931 offset))));
11932 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11933 gen_rtx_MEM (QImode,
11934 plus_constant (Pmode, base,
11935 offset + 1))));
11936 if (!BYTES_BIG_ENDIAN)
11937 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11938 gen_rtx_IOR (SImode,
11939 gen_rtx_ASHIFT
11940 (SImode,
11941 gen_rtx_SUBREG (SImode, operands[0], 0),
11942 GEN_INT (8)),
11943 scratch));
11944 else
11945 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11946 gen_rtx_IOR (SImode,
11947 gen_rtx_ASHIFT (SImode, scratch,
11948 GEN_INT (8)),
11949 gen_rtx_SUBREG (SImode, operands[0], 0)));
11950 }
11951
11952 /* Handle storing a half-word to memory during reload by synthesizing as two
11953 byte stores. Take care not to clobber the input values until after we
11954 have moved them somewhere safe. This code assumes that if the DImode
11955 scratch in operands[2] overlaps either the input value or output address
11956 in some way, then that value must die in this insn (we absolutely need
11957 two scratch registers for some corner cases). */
11958 void
11959 arm_reload_out_hi (rtx *operands)
11960 {
11961 rtx ref = operands[0];
11962 rtx outval = operands[1];
11963 rtx base, scratch;
11964 HOST_WIDE_INT offset = 0;
11965
11966 if (GET_CODE (ref) == SUBREG)
11967 {
11968 offset = SUBREG_BYTE (ref);
11969 ref = SUBREG_REG (ref);
11970 }
11971
11972 if (REG_P (ref))
11973 {
11974 /* We have a pseudo which has been spilt onto the stack; there
11975 are two cases here: the first where there is a simple
11976 stack-slot replacement and a second where the stack-slot is
11977 out of range, or is used as a subreg. */
11978 if (reg_equiv_mem (REGNO (ref)))
11979 {
11980 ref = reg_equiv_mem (REGNO (ref));
11981 base = find_replacement (&XEXP (ref, 0));
11982 }
11983 else
11984 /* The slot is out of range, or was dressed up in a SUBREG. */
11985 base = reg_equiv_address (REGNO (ref));
11986 }
11987 else
11988 base = find_replacement (&XEXP (ref, 0));
11989
11990 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11991
11992 /* Handle the case where the address is too complex to be offset by 1. */
11993 if (GET_CODE (base) == MINUS
11994 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
11995 {
11996 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11997
11998 /* Be careful not to destroy OUTVAL. */
11999 if (reg_overlap_mentioned_p (base_plus, outval))
12000 {
12001 /* Updating base_plus might destroy outval, see if we can
12002 swap the scratch and base_plus. */
12003 if (!reg_overlap_mentioned_p (scratch, outval))
12004 {
12005 rtx tmp = scratch;
12006 scratch = base_plus;
12007 base_plus = tmp;
12008 }
12009 else
12010 {
12011 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12012
12013 /* Be conservative and copy OUTVAL into the scratch now,
12014 this should only be necessary if outval is a subreg
12015 of something larger than a word. */
12016 /* XXX Might this clobber base? I can't see how it can,
12017 since scratch is known to overlap with OUTVAL, and
12018 must be wider than a word. */
12019 emit_insn (gen_movhi (scratch_hi, outval));
12020 outval = scratch_hi;
12021 }
12022 }
12023
12024 emit_set_insn (base_plus, base);
12025 base = base_plus;
12026 }
12027 else if (GET_CODE (base) == PLUS)
12028 {
12029 /* The addend must be CONST_INT, or we would have dealt with it above. */
12030 HOST_WIDE_INT hi, lo;
12031
12032 offset += INTVAL (XEXP (base, 1));
12033 base = XEXP (base, 0);
12034
12035 /* Rework the address into a legal sequence of insns. */
12036 /* Valid range for lo is -4095 -> 4095 */
12037 lo = (offset >= 0
12038 ? (offset & 0xfff)
12039 : -((-offset) & 0xfff));
12040
12041 /* Corner case, if lo is the max offset then we would be out of range
12042 once we have added the additional 1 below, so bump the msb into the
12043 pre-loading insn(s). */
12044 if (lo == 4095)
12045 lo &= 0x7ff;
12046
12047 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12048 ^ (HOST_WIDE_INT) 0x80000000)
12049 - (HOST_WIDE_INT) 0x80000000);
12050
12051 gcc_assert (hi + lo == offset);
12052
12053 if (hi != 0)
12054 {
12055 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12056
12057 /* Be careful not to destroy OUTVAL. */
12058 if (reg_overlap_mentioned_p (base_plus, outval))
12059 {
12060 /* Updating base_plus might destroy outval, see if we
12061 can swap the scratch and base_plus. */
12062 if (!reg_overlap_mentioned_p (scratch, outval))
12063 {
12064 rtx tmp = scratch;
12065 scratch = base_plus;
12066 base_plus = tmp;
12067 }
12068 else
12069 {
12070 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12071
12072 /* Be conservative and copy outval into scratch now,
12073 this should only be necessary if outval is a
12074 subreg of something larger than a word. */
12075 /* XXX Might this clobber base? I can't see how it
12076 can, since scratch is known to overlap with
12077 outval. */
12078 emit_insn (gen_movhi (scratch_hi, outval));
12079 outval = scratch_hi;
12080 }
12081 }
12082
12083 /* Get the base address; addsi3 knows how to handle constants
12084 that require more than one insn. */
12085 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12086 base = base_plus;
12087 offset = lo;
12088 }
12089 }
12090
12091 if (BYTES_BIG_ENDIAN)
12092 {
12093 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12094 plus_constant (Pmode, base,
12095 offset + 1)),
12096 gen_lowpart (QImode, outval)));
12097 emit_insn (gen_lshrsi3 (scratch,
12098 gen_rtx_SUBREG (SImode, outval, 0),
12099 GEN_INT (8)));
12100 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12101 offset)),
12102 gen_lowpart (QImode, scratch)));
12103 }
12104 else
12105 {
12106 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12107 offset)),
12108 gen_lowpart (QImode, outval)));
12109 emit_insn (gen_lshrsi3 (scratch,
12110 gen_rtx_SUBREG (SImode, outval, 0),
12111 GEN_INT (8)));
12112 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12113 plus_constant (Pmode, base,
12114 offset + 1)),
12115 gen_lowpart (QImode, scratch)));
12116 }
12117 }
12118
12119 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12120 (padded to the size of a word) should be passed in a register. */
12121
12122 static bool
12123 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12124 {
12125 if (TARGET_AAPCS_BASED)
12126 return must_pass_in_stack_var_size (mode, type);
12127 else
12128 return must_pass_in_stack_var_size_or_pad (mode, type);
12129 }
12130
12131
12132 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12133 Return true if an argument passed on the stack should be padded upwards,
12134 i.e. if the least-significant byte has useful data.
12135 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12136 aggregate types are placed in the lowest memory address. */
12137
12138 bool
12139 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12140 {
12141 if (!TARGET_AAPCS_BASED)
12142 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12143
12144 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12145 return false;
12146
12147 return true;
12148 }
12149
12150
12151 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12152 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12153 register has useful data, and return the opposite if the most
12154 significant byte does. */
12155
12156 bool
12157 arm_pad_reg_upward (enum machine_mode mode,
12158 tree type, int first ATTRIBUTE_UNUSED)
12159 {
12160 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12161 {
12162 /* For AAPCS, small aggregates, small fixed-point types,
12163 and small complex types are always padded upwards. */
12164 if (type)
12165 {
12166 if ((AGGREGATE_TYPE_P (type)
12167 || TREE_CODE (type) == COMPLEX_TYPE
12168 || FIXED_POINT_TYPE_P (type))
12169 && int_size_in_bytes (type) <= 4)
12170 return true;
12171 }
12172 else
12173 {
12174 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12175 && GET_MODE_SIZE (mode) <= 4)
12176 return true;
12177 }
12178 }
12179
12180 /* Otherwise, use default padding. */
12181 return !BYTES_BIG_ENDIAN;
12182 }
12183
12184 \f
12185 /* Print a symbolic form of X to the debug file, F. */
12186 static void
12187 arm_print_value (FILE *f, rtx x)
12188 {
12189 switch (GET_CODE (x))
12190 {
12191 case CONST_INT:
12192 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12193 return;
12194
12195 case CONST_DOUBLE:
12196 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12197 return;
12198
12199 case CONST_VECTOR:
12200 {
12201 int i;
12202
12203 fprintf (f, "<");
12204 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12205 {
12206 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12207 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12208 fputc (',', f);
12209 }
12210 fprintf (f, ">");
12211 }
12212 return;
12213
12214 case CONST_STRING:
12215 fprintf (f, "\"%s\"", XSTR (x, 0));
12216 return;
12217
12218 case SYMBOL_REF:
12219 fprintf (f, "`%s'", XSTR (x, 0));
12220 return;
12221
12222 case LABEL_REF:
12223 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12224 return;
12225
12226 case CONST:
12227 arm_print_value (f, XEXP (x, 0));
12228 return;
12229
12230 case PLUS:
12231 arm_print_value (f, XEXP (x, 0));
12232 fprintf (f, "+");
12233 arm_print_value (f, XEXP (x, 1));
12234 return;
12235
12236 case PC:
12237 fprintf (f, "pc");
12238 return;
12239
12240 default:
12241 fprintf (f, "????");
12242 return;
12243 }
12244 }
12245 \f
12246 /* Routines for manipulation of the constant pool. */
12247
12248 /* Arm instructions cannot load a large constant directly into a
12249 register; they have to come from a pc relative load. The constant
12250 must therefore be placed in the addressable range of the pc
12251 relative load. Depending on the precise pc relative load
12252 instruction the range is somewhere between 256 bytes and 4k. This
12253 means that we often have to dump a constant inside a function, and
12254 generate code to branch around it.
12255
12256 It is important to minimize this, since the branches will slow
12257 things down and make the code larger.
12258
12259 Normally we can hide the table after an existing unconditional
12260 branch so that there is no interruption of the flow, but in the
12261 worst case the code looks like this:
12262
12263 ldr rn, L1
12264 ...
12265 b L2
12266 align
12267 L1: .long value
12268 L2:
12269 ...
12270
12271 ldr rn, L3
12272 ...
12273 b L4
12274 align
12275 L3: .long value
12276 L4:
12277 ...
12278
12279 We fix this by performing a scan after scheduling, which notices
12280 which instructions need to have their operands fetched from the
12281 constant table and builds the table.
12282
12283 The algorithm starts by building a table of all the constants that
12284 need fixing up and all the natural barriers in the function (places
12285 where a constant table can be dropped without breaking the flow).
12286 For each fixup we note how far the pc-relative replacement will be
12287 able to reach and the offset of the instruction into the function.
12288
12289 Having built the table we then group the fixes together to form
12290 tables that are as large as possible (subject to addressing
12291 constraints) and emit each table of constants after the last
12292 barrier that is within range of all the instructions in the group.
12293 If a group does not contain a barrier, then we forcibly create one
12294 by inserting a jump instruction into the flow. Once the table has
12295 been inserted, the insns are then modified to reference the
12296 relevant entry in the pool.
12297
12298 Possible enhancements to the algorithm (not implemented) are:
12299
12300 1) For some processors and object formats, there may be benefit in
12301 aligning the pools to the start of cache lines; this alignment
12302 would need to be taken into account when calculating addressability
12303 of a pool. */
12304
12305 /* These typedefs are located at the start of this file, so that
12306 they can be used in the prototypes there. This comment is to
12307 remind readers of that fact so that the following structures
12308 can be understood more easily.
12309
12310 typedef struct minipool_node Mnode;
12311 typedef struct minipool_fixup Mfix; */
12312
12313 struct minipool_node
12314 {
12315 /* Doubly linked chain of entries. */
12316 Mnode * next;
12317 Mnode * prev;
12318 /* The maximum offset into the code that this entry can be placed. While
12319 pushing fixes for forward references, all entries are sorted in order
12320 of increasing max_address. */
12321 HOST_WIDE_INT max_address;
12322 /* Similarly for an entry inserted for a backwards ref. */
12323 HOST_WIDE_INT min_address;
12324 /* The number of fixes referencing this entry. This can become zero
12325 if we "unpush" an entry. In this case we ignore the entry when we
12326 come to emit the code. */
12327 int refcount;
12328 /* The offset from the start of the minipool. */
12329 HOST_WIDE_INT offset;
12330 /* The value in table. */
12331 rtx value;
12332 /* The mode of value. */
12333 enum machine_mode mode;
12334 /* The size of the value. With iWMMXt enabled
12335 sizes > 4 also imply an alignment of 8-bytes. */
12336 int fix_size;
12337 };
12338
12339 struct minipool_fixup
12340 {
12341 Mfix * next;
12342 rtx insn;
12343 HOST_WIDE_INT address;
12344 rtx * loc;
12345 enum machine_mode mode;
12346 int fix_size;
12347 rtx value;
12348 Mnode * minipool;
12349 HOST_WIDE_INT forwards;
12350 HOST_WIDE_INT backwards;
12351 };
12352
12353 /* Fixes less than a word need padding out to a word boundary. */
12354 #define MINIPOOL_FIX_SIZE(mode) \
12355 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12356
12357 static Mnode * minipool_vector_head;
12358 static Mnode * minipool_vector_tail;
12359 static rtx minipool_vector_label;
12360 static int minipool_pad;
12361
12362 /* The linked list of all minipool fixes required for this function. */
12363 Mfix * minipool_fix_head;
12364 Mfix * minipool_fix_tail;
12365 /* The fix entry for the current minipool, once it has been placed. */
12366 Mfix * minipool_barrier;
12367
12368 /* Determines if INSN is the start of a jump table. Returns the end
12369 of the TABLE or NULL_RTX. */
12370 static rtx
12371 is_jump_table (rtx insn)
12372 {
12373 rtx table;
12374
12375 if (jump_to_label_p (insn)
12376 && ((table = next_real_insn (JUMP_LABEL (insn)))
12377 == next_real_insn (insn))
12378 && table != NULL
12379 && JUMP_P (table)
12380 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12381 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12382 return table;
12383
12384 return NULL_RTX;
12385 }
12386
12387 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12388 #define JUMP_TABLES_IN_TEXT_SECTION 0
12389 #endif
12390
12391 static HOST_WIDE_INT
12392 get_jump_table_size (rtx insn)
12393 {
12394 /* ADDR_VECs only take room if read-only data does into the text
12395 section. */
12396 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12397 {
12398 rtx body = PATTERN (insn);
12399 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12400 HOST_WIDE_INT size;
12401 HOST_WIDE_INT modesize;
12402
12403 modesize = GET_MODE_SIZE (GET_MODE (body));
12404 size = modesize * XVECLEN (body, elt);
12405 switch (modesize)
12406 {
12407 case 1:
12408 /* Round up size of TBB table to a halfword boundary. */
12409 size = (size + 1) & ~(HOST_WIDE_INT)1;
12410 break;
12411 case 2:
12412 /* No padding necessary for TBH. */
12413 break;
12414 case 4:
12415 /* Add two bytes for alignment on Thumb. */
12416 if (TARGET_THUMB)
12417 size += 2;
12418 break;
12419 default:
12420 gcc_unreachable ();
12421 }
12422 return size;
12423 }
12424
12425 return 0;
12426 }
12427
12428 /* Return the maximum amount of padding that will be inserted before
12429 label LABEL. */
12430
12431 static HOST_WIDE_INT
12432 get_label_padding (rtx label)
12433 {
12434 HOST_WIDE_INT align, min_insn_size;
12435
12436 align = 1 << label_to_alignment (label);
12437 min_insn_size = TARGET_THUMB ? 2 : 4;
12438 return align > min_insn_size ? align - min_insn_size : 0;
12439 }
12440
12441 /* Move a minipool fix MP from its current location to before MAX_MP.
12442 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12443 constraints may need updating. */
12444 static Mnode *
12445 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12446 HOST_WIDE_INT max_address)
12447 {
12448 /* The code below assumes these are different. */
12449 gcc_assert (mp != max_mp);
12450
12451 if (max_mp == NULL)
12452 {
12453 if (max_address < mp->max_address)
12454 mp->max_address = max_address;
12455 }
12456 else
12457 {
12458 if (max_address > max_mp->max_address - mp->fix_size)
12459 mp->max_address = max_mp->max_address - mp->fix_size;
12460 else
12461 mp->max_address = max_address;
12462
12463 /* Unlink MP from its current position. Since max_mp is non-null,
12464 mp->prev must be non-null. */
12465 mp->prev->next = mp->next;
12466 if (mp->next != NULL)
12467 mp->next->prev = mp->prev;
12468 else
12469 minipool_vector_tail = mp->prev;
12470
12471 /* Re-insert it before MAX_MP. */
12472 mp->next = max_mp;
12473 mp->prev = max_mp->prev;
12474 max_mp->prev = mp;
12475
12476 if (mp->prev != NULL)
12477 mp->prev->next = mp;
12478 else
12479 minipool_vector_head = mp;
12480 }
12481
12482 /* Save the new entry. */
12483 max_mp = mp;
12484
12485 /* Scan over the preceding entries and adjust their addresses as
12486 required. */
12487 while (mp->prev != NULL
12488 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12489 {
12490 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12491 mp = mp->prev;
12492 }
12493
12494 return max_mp;
12495 }
12496
12497 /* Add a constant to the minipool for a forward reference. Returns the
12498 node added or NULL if the constant will not fit in this pool. */
12499 static Mnode *
12500 add_minipool_forward_ref (Mfix *fix)
12501 {
12502 /* If set, max_mp is the first pool_entry that has a lower
12503 constraint than the one we are trying to add. */
12504 Mnode * max_mp = NULL;
12505 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12506 Mnode * mp;
12507
12508 /* If the minipool starts before the end of FIX->INSN then this FIX
12509 can not be placed into the current pool. Furthermore, adding the
12510 new constant pool entry may cause the pool to start FIX_SIZE bytes
12511 earlier. */
12512 if (minipool_vector_head &&
12513 (fix->address + get_attr_length (fix->insn)
12514 >= minipool_vector_head->max_address - fix->fix_size))
12515 return NULL;
12516
12517 /* Scan the pool to see if a constant with the same value has
12518 already been added. While we are doing this, also note the
12519 location where we must insert the constant if it doesn't already
12520 exist. */
12521 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12522 {
12523 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12524 && fix->mode == mp->mode
12525 && (!LABEL_P (fix->value)
12526 || (CODE_LABEL_NUMBER (fix->value)
12527 == CODE_LABEL_NUMBER (mp->value)))
12528 && rtx_equal_p (fix->value, mp->value))
12529 {
12530 /* More than one fix references this entry. */
12531 mp->refcount++;
12532 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12533 }
12534
12535 /* Note the insertion point if necessary. */
12536 if (max_mp == NULL
12537 && mp->max_address > max_address)
12538 max_mp = mp;
12539
12540 /* If we are inserting an 8-bytes aligned quantity and
12541 we have not already found an insertion point, then
12542 make sure that all such 8-byte aligned quantities are
12543 placed at the start of the pool. */
12544 if (ARM_DOUBLEWORD_ALIGN
12545 && max_mp == NULL
12546 && fix->fix_size >= 8
12547 && mp->fix_size < 8)
12548 {
12549 max_mp = mp;
12550 max_address = mp->max_address;
12551 }
12552 }
12553
12554 /* The value is not currently in the minipool, so we need to create
12555 a new entry for it. If MAX_MP is NULL, the entry will be put on
12556 the end of the list since the placement is less constrained than
12557 any existing entry. Otherwise, we insert the new fix before
12558 MAX_MP and, if necessary, adjust the constraints on the other
12559 entries. */
12560 mp = XNEW (Mnode);
12561 mp->fix_size = fix->fix_size;
12562 mp->mode = fix->mode;
12563 mp->value = fix->value;
12564 mp->refcount = 1;
12565 /* Not yet required for a backwards ref. */
12566 mp->min_address = -65536;
12567
12568 if (max_mp == NULL)
12569 {
12570 mp->max_address = max_address;
12571 mp->next = NULL;
12572 mp->prev = minipool_vector_tail;
12573
12574 if (mp->prev == NULL)
12575 {
12576 minipool_vector_head = mp;
12577 minipool_vector_label = gen_label_rtx ();
12578 }
12579 else
12580 mp->prev->next = mp;
12581
12582 minipool_vector_tail = mp;
12583 }
12584 else
12585 {
12586 if (max_address > max_mp->max_address - mp->fix_size)
12587 mp->max_address = max_mp->max_address - mp->fix_size;
12588 else
12589 mp->max_address = max_address;
12590
12591 mp->next = max_mp;
12592 mp->prev = max_mp->prev;
12593 max_mp->prev = mp;
12594 if (mp->prev != NULL)
12595 mp->prev->next = mp;
12596 else
12597 minipool_vector_head = mp;
12598 }
12599
12600 /* Save the new entry. */
12601 max_mp = mp;
12602
12603 /* Scan over the preceding entries and adjust their addresses as
12604 required. */
12605 while (mp->prev != NULL
12606 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12607 {
12608 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12609 mp = mp->prev;
12610 }
12611
12612 return max_mp;
12613 }
12614
12615 static Mnode *
12616 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12617 HOST_WIDE_INT min_address)
12618 {
12619 HOST_WIDE_INT offset;
12620
12621 /* The code below assumes these are different. */
12622 gcc_assert (mp != min_mp);
12623
12624 if (min_mp == NULL)
12625 {
12626 if (min_address > mp->min_address)
12627 mp->min_address = min_address;
12628 }
12629 else
12630 {
12631 /* We will adjust this below if it is too loose. */
12632 mp->min_address = min_address;
12633
12634 /* Unlink MP from its current position. Since min_mp is non-null,
12635 mp->next must be non-null. */
12636 mp->next->prev = mp->prev;
12637 if (mp->prev != NULL)
12638 mp->prev->next = mp->next;
12639 else
12640 minipool_vector_head = mp->next;
12641
12642 /* Reinsert it after MIN_MP. */
12643 mp->prev = min_mp;
12644 mp->next = min_mp->next;
12645 min_mp->next = mp;
12646 if (mp->next != NULL)
12647 mp->next->prev = mp;
12648 else
12649 minipool_vector_tail = mp;
12650 }
12651
12652 min_mp = mp;
12653
12654 offset = 0;
12655 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12656 {
12657 mp->offset = offset;
12658 if (mp->refcount > 0)
12659 offset += mp->fix_size;
12660
12661 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12662 mp->next->min_address = mp->min_address + mp->fix_size;
12663 }
12664
12665 return min_mp;
12666 }
12667
12668 /* Add a constant to the minipool for a backward reference. Returns the
12669 node added or NULL if the constant will not fit in this pool.
12670
12671 Note that the code for insertion for a backwards reference can be
12672 somewhat confusing because the calculated offsets for each fix do
12673 not take into account the size of the pool (which is still under
12674 construction. */
12675 static Mnode *
12676 add_minipool_backward_ref (Mfix *fix)
12677 {
12678 /* If set, min_mp is the last pool_entry that has a lower constraint
12679 than the one we are trying to add. */
12680 Mnode *min_mp = NULL;
12681 /* This can be negative, since it is only a constraint. */
12682 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12683 Mnode *mp;
12684
12685 /* If we can't reach the current pool from this insn, or if we can't
12686 insert this entry at the end of the pool without pushing other
12687 fixes out of range, then we don't try. This ensures that we
12688 can't fail later on. */
12689 if (min_address >= minipool_barrier->address
12690 || (minipool_vector_tail->min_address + fix->fix_size
12691 >= minipool_barrier->address))
12692 return NULL;
12693
12694 /* Scan the pool to see if a constant with the same value has
12695 already been added. While we are doing this, also note the
12696 location where we must insert the constant if it doesn't already
12697 exist. */
12698 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12699 {
12700 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12701 && fix->mode == mp->mode
12702 && (!LABEL_P (fix->value)
12703 || (CODE_LABEL_NUMBER (fix->value)
12704 == CODE_LABEL_NUMBER (mp->value)))
12705 && rtx_equal_p (fix->value, mp->value)
12706 /* Check that there is enough slack to move this entry to the
12707 end of the table (this is conservative). */
12708 && (mp->max_address
12709 > (minipool_barrier->address
12710 + minipool_vector_tail->offset
12711 + minipool_vector_tail->fix_size)))
12712 {
12713 mp->refcount++;
12714 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12715 }
12716
12717 if (min_mp != NULL)
12718 mp->min_address += fix->fix_size;
12719 else
12720 {
12721 /* Note the insertion point if necessary. */
12722 if (mp->min_address < min_address)
12723 {
12724 /* For now, we do not allow the insertion of 8-byte alignment
12725 requiring nodes anywhere but at the start of the pool. */
12726 if (ARM_DOUBLEWORD_ALIGN
12727 && fix->fix_size >= 8 && mp->fix_size < 8)
12728 return NULL;
12729 else
12730 min_mp = mp;
12731 }
12732 else if (mp->max_address
12733 < minipool_barrier->address + mp->offset + fix->fix_size)
12734 {
12735 /* Inserting before this entry would push the fix beyond
12736 its maximum address (which can happen if we have
12737 re-located a forwards fix); force the new fix to come
12738 after it. */
12739 if (ARM_DOUBLEWORD_ALIGN
12740 && fix->fix_size >= 8 && mp->fix_size < 8)
12741 return NULL;
12742 else
12743 {
12744 min_mp = mp;
12745 min_address = mp->min_address + fix->fix_size;
12746 }
12747 }
12748 /* Do not insert a non-8-byte aligned quantity before 8-byte
12749 aligned quantities. */
12750 else if (ARM_DOUBLEWORD_ALIGN
12751 && fix->fix_size < 8
12752 && mp->fix_size >= 8)
12753 {
12754 min_mp = mp;
12755 min_address = mp->min_address + fix->fix_size;
12756 }
12757 }
12758 }
12759
12760 /* We need to create a new entry. */
12761 mp = XNEW (Mnode);
12762 mp->fix_size = fix->fix_size;
12763 mp->mode = fix->mode;
12764 mp->value = fix->value;
12765 mp->refcount = 1;
12766 mp->max_address = minipool_barrier->address + 65536;
12767
12768 mp->min_address = min_address;
12769
12770 if (min_mp == NULL)
12771 {
12772 mp->prev = NULL;
12773 mp->next = minipool_vector_head;
12774
12775 if (mp->next == NULL)
12776 {
12777 minipool_vector_tail = mp;
12778 minipool_vector_label = gen_label_rtx ();
12779 }
12780 else
12781 mp->next->prev = mp;
12782
12783 minipool_vector_head = mp;
12784 }
12785 else
12786 {
12787 mp->next = min_mp->next;
12788 mp->prev = min_mp;
12789 min_mp->next = mp;
12790
12791 if (mp->next != NULL)
12792 mp->next->prev = mp;
12793 else
12794 minipool_vector_tail = mp;
12795 }
12796
12797 /* Save the new entry. */
12798 min_mp = mp;
12799
12800 if (mp->prev)
12801 mp = mp->prev;
12802 else
12803 mp->offset = 0;
12804
12805 /* Scan over the following entries and adjust their offsets. */
12806 while (mp->next != NULL)
12807 {
12808 if (mp->next->min_address < mp->min_address + mp->fix_size)
12809 mp->next->min_address = mp->min_address + mp->fix_size;
12810
12811 if (mp->refcount)
12812 mp->next->offset = mp->offset + mp->fix_size;
12813 else
12814 mp->next->offset = mp->offset;
12815
12816 mp = mp->next;
12817 }
12818
12819 return min_mp;
12820 }
12821
12822 static void
12823 assign_minipool_offsets (Mfix *barrier)
12824 {
12825 HOST_WIDE_INT offset = 0;
12826 Mnode *mp;
12827
12828 minipool_barrier = barrier;
12829
12830 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12831 {
12832 mp->offset = offset;
12833
12834 if (mp->refcount > 0)
12835 offset += mp->fix_size;
12836 }
12837 }
12838
12839 /* Output the literal table */
12840 static void
12841 dump_minipool (rtx scan)
12842 {
12843 Mnode * mp;
12844 Mnode * nmp;
12845 int align64 = 0;
12846
12847 if (ARM_DOUBLEWORD_ALIGN)
12848 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12849 if (mp->refcount > 0 && mp->fix_size >= 8)
12850 {
12851 align64 = 1;
12852 break;
12853 }
12854
12855 if (dump_file)
12856 fprintf (dump_file,
12857 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12858 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12859
12860 scan = emit_label_after (gen_label_rtx (), scan);
12861 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12862 scan = emit_label_after (minipool_vector_label, scan);
12863
12864 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12865 {
12866 if (mp->refcount > 0)
12867 {
12868 if (dump_file)
12869 {
12870 fprintf (dump_file,
12871 ";; Offset %u, min %ld, max %ld ",
12872 (unsigned) mp->offset, (unsigned long) mp->min_address,
12873 (unsigned long) mp->max_address);
12874 arm_print_value (dump_file, mp->value);
12875 fputc ('\n', dump_file);
12876 }
12877
12878 switch (mp->fix_size)
12879 {
12880 #ifdef HAVE_consttable_1
12881 case 1:
12882 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12883 break;
12884
12885 #endif
12886 #ifdef HAVE_consttable_2
12887 case 2:
12888 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12889 break;
12890
12891 #endif
12892 #ifdef HAVE_consttable_4
12893 case 4:
12894 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12895 break;
12896
12897 #endif
12898 #ifdef HAVE_consttable_8
12899 case 8:
12900 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12901 break;
12902
12903 #endif
12904 #ifdef HAVE_consttable_16
12905 case 16:
12906 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12907 break;
12908
12909 #endif
12910 default:
12911 gcc_unreachable ();
12912 }
12913 }
12914
12915 nmp = mp->next;
12916 free (mp);
12917 }
12918
12919 minipool_vector_head = minipool_vector_tail = NULL;
12920 scan = emit_insn_after (gen_consttable_end (), scan);
12921 scan = emit_barrier_after (scan);
12922 }
12923
12924 /* Return the cost of forcibly inserting a barrier after INSN. */
12925 static int
12926 arm_barrier_cost (rtx insn)
12927 {
12928 /* Basing the location of the pool on the loop depth is preferable,
12929 but at the moment, the basic block information seems to be
12930 corrupt by this stage of the compilation. */
12931 int base_cost = 50;
12932 rtx next = next_nonnote_insn (insn);
12933
12934 if (next != NULL && LABEL_P (next))
12935 base_cost -= 20;
12936
12937 switch (GET_CODE (insn))
12938 {
12939 case CODE_LABEL:
12940 /* It will always be better to place the table before the label, rather
12941 than after it. */
12942 return 50;
12943
12944 case INSN:
12945 case CALL_INSN:
12946 return base_cost;
12947
12948 case JUMP_INSN:
12949 return base_cost - 10;
12950
12951 default:
12952 return base_cost + 10;
12953 }
12954 }
12955
12956 /* Find the best place in the insn stream in the range
12957 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12958 Create the barrier by inserting a jump and add a new fix entry for
12959 it. */
12960 static Mfix *
12961 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12962 {
12963 HOST_WIDE_INT count = 0;
12964 rtx barrier;
12965 rtx from = fix->insn;
12966 /* The instruction after which we will insert the jump. */
12967 rtx selected = NULL;
12968 int selected_cost;
12969 /* The address at which the jump instruction will be placed. */
12970 HOST_WIDE_INT selected_address;
12971 Mfix * new_fix;
12972 HOST_WIDE_INT max_count = max_address - fix->address;
12973 rtx label = gen_label_rtx ();
12974
12975 selected_cost = arm_barrier_cost (from);
12976 selected_address = fix->address;
12977
12978 while (from && count < max_count)
12979 {
12980 rtx tmp;
12981 int new_cost;
12982
12983 /* This code shouldn't have been called if there was a natural barrier
12984 within range. */
12985 gcc_assert (!BARRIER_P (from));
12986
12987 /* Count the length of this insn. This must stay in sync with the
12988 code that pushes minipool fixes. */
12989 if (LABEL_P (from))
12990 count += get_label_padding (from);
12991 else
12992 count += get_attr_length (from);
12993
12994 /* If there is a jump table, add its length. */
12995 tmp = is_jump_table (from);
12996 if (tmp != NULL)
12997 {
12998 count += get_jump_table_size (tmp);
12999
13000 /* Jump tables aren't in a basic block, so base the cost on
13001 the dispatch insn. If we select this location, we will
13002 still put the pool after the table. */
13003 new_cost = arm_barrier_cost (from);
13004
13005 if (count < max_count
13006 && (!selected || new_cost <= selected_cost))
13007 {
13008 selected = tmp;
13009 selected_cost = new_cost;
13010 selected_address = fix->address + count;
13011 }
13012
13013 /* Continue after the dispatch table. */
13014 from = NEXT_INSN (tmp);
13015 continue;
13016 }
13017
13018 new_cost = arm_barrier_cost (from);
13019
13020 if (count < max_count
13021 && (!selected || new_cost <= selected_cost))
13022 {
13023 selected = from;
13024 selected_cost = new_cost;
13025 selected_address = fix->address + count;
13026 }
13027
13028 from = NEXT_INSN (from);
13029 }
13030
13031 /* Make sure that we found a place to insert the jump. */
13032 gcc_assert (selected);
13033
13034 /* Make sure we do not split a call and its corresponding
13035 CALL_ARG_LOCATION note. */
13036 if (CALL_P (selected))
13037 {
13038 rtx next = NEXT_INSN (selected);
13039 if (next && NOTE_P (next)
13040 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13041 selected = next;
13042 }
13043
13044 /* Create a new JUMP_INSN that branches around a barrier. */
13045 from = emit_jump_insn_after (gen_jump (label), selected);
13046 JUMP_LABEL (from) = label;
13047 barrier = emit_barrier_after (from);
13048 emit_label_after (label, barrier);
13049
13050 /* Create a minipool barrier entry for the new barrier. */
13051 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13052 new_fix->insn = barrier;
13053 new_fix->address = selected_address;
13054 new_fix->next = fix->next;
13055 fix->next = new_fix;
13056
13057 return new_fix;
13058 }
13059
13060 /* Record that there is a natural barrier in the insn stream at
13061 ADDRESS. */
13062 static void
13063 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13064 {
13065 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13066
13067 fix->insn = insn;
13068 fix->address = address;
13069
13070 fix->next = NULL;
13071 if (minipool_fix_head != NULL)
13072 minipool_fix_tail->next = fix;
13073 else
13074 minipool_fix_head = fix;
13075
13076 minipool_fix_tail = fix;
13077 }
13078
13079 /* Record INSN, which will need fixing up to load a value from the
13080 minipool. ADDRESS is the offset of the insn since the start of the
13081 function; LOC is a pointer to the part of the insn which requires
13082 fixing; VALUE is the constant that must be loaded, which is of type
13083 MODE. */
13084 static void
13085 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13086 enum machine_mode mode, rtx value)
13087 {
13088 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13089
13090 fix->insn = insn;
13091 fix->address = address;
13092 fix->loc = loc;
13093 fix->mode = mode;
13094 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13095 fix->value = value;
13096 fix->forwards = get_attr_pool_range (insn);
13097 fix->backwards = get_attr_neg_pool_range (insn);
13098 fix->minipool = NULL;
13099
13100 /* If an insn doesn't have a range defined for it, then it isn't
13101 expecting to be reworked by this code. Better to stop now than
13102 to generate duff assembly code. */
13103 gcc_assert (fix->forwards || fix->backwards);
13104
13105 /* If an entry requires 8-byte alignment then assume all constant pools
13106 require 4 bytes of padding. Trying to do this later on a per-pool
13107 basis is awkward because existing pool entries have to be modified. */
13108 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13109 minipool_pad = 4;
13110
13111 if (dump_file)
13112 {
13113 fprintf (dump_file,
13114 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13115 GET_MODE_NAME (mode),
13116 INSN_UID (insn), (unsigned long) address,
13117 -1 * (long)fix->backwards, (long)fix->forwards);
13118 arm_print_value (dump_file, fix->value);
13119 fprintf (dump_file, "\n");
13120 }
13121
13122 /* Add it to the chain of fixes. */
13123 fix->next = NULL;
13124
13125 if (minipool_fix_head != NULL)
13126 minipool_fix_tail->next = fix;
13127 else
13128 minipool_fix_head = fix;
13129
13130 minipool_fix_tail = fix;
13131 }
13132
13133 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13134 Returns the number of insns needed, or 99 if we don't know how to
13135 do it. */
13136 int
13137 arm_const_double_inline_cost (rtx val)
13138 {
13139 rtx lowpart, highpart;
13140 enum machine_mode mode;
13141
13142 mode = GET_MODE (val);
13143
13144 if (mode == VOIDmode)
13145 mode = DImode;
13146
13147 gcc_assert (GET_MODE_SIZE (mode) == 8);
13148
13149 lowpart = gen_lowpart (SImode, val);
13150 highpart = gen_highpart_mode (SImode, mode, val);
13151
13152 gcc_assert (CONST_INT_P (lowpart));
13153 gcc_assert (CONST_INT_P (highpart));
13154
13155 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13156 NULL_RTX, NULL_RTX, 0, 0)
13157 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13158 NULL_RTX, NULL_RTX, 0, 0));
13159 }
13160
13161 /* Return true if it is worthwhile to split a 64-bit constant into two
13162 32-bit operations. This is the case if optimizing for size, or
13163 if we have load delay slots, or if one 32-bit part can be done with
13164 a single data operation. */
13165 bool
13166 arm_const_double_by_parts (rtx val)
13167 {
13168 enum machine_mode mode = GET_MODE (val);
13169 rtx part;
13170
13171 if (optimize_size || arm_ld_sched)
13172 return true;
13173
13174 if (mode == VOIDmode)
13175 mode = DImode;
13176
13177 part = gen_highpart_mode (SImode, mode, val);
13178
13179 gcc_assert (CONST_INT_P (part));
13180
13181 if (const_ok_for_arm (INTVAL (part))
13182 || const_ok_for_arm (~INTVAL (part)))
13183 return true;
13184
13185 part = gen_lowpart (SImode, val);
13186
13187 gcc_assert (CONST_INT_P (part));
13188
13189 if (const_ok_for_arm (INTVAL (part))
13190 || const_ok_for_arm (~INTVAL (part)))
13191 return true;
13192
13193 return false;
13194 }
13195
13196 /* Return true if it is possible to inline both the high and low parts
13197 of a 64-bit constant into 32-bit data processing instructions. */
13198 bool
13199 arm_const_double_by_immediates (rtx val)
13200 {
13201 enum machine_mode mode = GET_MODE (val);
13202 rtx part;
13203
13204 if (mode == VOIDmode)
13205 mode = DImode;
13206
13207 part = gen_highpart_mode (SImode, mode, val);
13208
13209 gcc_assert (CONST_INT_P (part));
13210
13211 if (!const_ok_for_arm (INTVAL (part)))
13212 return false;
13213
13214 part = gen_lowpart (SImode, val);
13215
13216 gcc_assert (CONST_INT_P (part));
13217
13218 if (!const_ok_for_arm (INTVAL (part)))
13219 return false;
13220
13221 return true;
13222 }
13223
13224 /* Scan INSN and note any of its operands that need fixing.
13225 If DO_PUSHES is false we do not actually push any of the fixups
13226 needed. */
13227 static void
13228 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13229 {
13230 int opno;
13231
13232 extract_insn (insn);
13233
13234 if (!constrain_operands (1))
13235 fatal_insn_not_found (insn);
13236
13237 if (recog_data.n_alternatives == 0)
13238 return;
13239
13240 /* Fill in recog_op_alt with information about the constraints of
13241 this insn. */
13242 preprocess_constraints ();
13243
13244 for (opno = 0; opno < recog_data.n_operands; opno++)
13245 {
13246 /* Things we need to fix can only occur in inputs. */
13247 if (recog_data.operand_type[opno] != OP_IN)
13248 continue;
13249
13250 /* If this alternative is a memory reference, then any mention
13251 of constants in this alternative is really to fool reload
13252 into allowing us to accept one there. We need to fix them up
13253 now so that we output the right code. */
13254 if (recog_op_alt[opno][which_alternative].memory_ok)
13255 {
13256 rtx op = recog_data.operand[opno];
13257
13258 if (CONSTANT_P (op))
13259 {
13260 if (do_pushes)
13261 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13262 recog_data.operand_mode[opno], op);
13263 }
13264 else if (MEM_P (op)
13265 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13266 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13267 {
13268 if (do_pushes)
13269 {
13270 rtx cop = avoid_constant_pool_reference (op);
13271
13272 /* Casting the address of something to a mode narrower
13273 than a word can cause avoid_constant_pool_reference()
13274 to return the pool reference itself. That's no good to
13275 us here. Lets just hope that we can use the
13276 constant pool value directly. */
13277 if (op == cop)
13278 cop = get_pool_constant (XEXP (op, 0));
13279
13280 push_minipool_fix (insn, address,
13281 recog_data.operand_loc[opno],
13282 recog_data.operand_mode[opno], cop);
13283 }
13284
13285 }
13286 }
13287 }
13288
13289 return;
13290 }
13291
13292 /* Convert instructions to their cc-clobbering variant if possible, since
13293 that allows us to use smaller encodings. */
13294
13295 static void
13296 thumb2_reorg (void)
13297 {
13298 basic_block bb;
13299 regset_head live;
13300
13301 INIT_REG_SET (&live);
13302
13303 /* We are freeing block_for_insn in the toplev to keep compatibility
13304 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13305 compute_bb_for_insn ();
13306 df_analyze ();
13307
13308 FOR_EACH_BB (bb)
13309 {
13310 rtx insn;
13311
13312 COPY_REG_SET (&live, DF_LR_OUT (bb));
13313 df_simulate_initialize_backwards (bb, &live);
13314 FOR_BB_INSNS_REVERSE (bb, insn)
13315 {
13316 if (NONJUMP_INSN_P (insn)
13317 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13318 && GET_CODE (PATTERN (insn)) == SET)
13319 {
13320 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13321 rtx pat = PATTERN (insn);
13322 rtx dst = XEXP (pat, 0);
13323 rtx src = XEXP (pat, 1);
13324 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13325
13326 if (!OBJECT_P (src))
13327 op0 = XEXP (src, 0);
13328
13329 if (BINARY_P (src))
13330 op1 = XEXP (src, 1);
13331
13332 if (low_register_operand (dst, SImode))
13333 {
13334 switch (GET_CODE (src))
13335 {
13336 case PLUS:
13337 /* Adding two registers and storing the result
13338 in the first source is already a 16-bit
13339 operation. */
13340 if (rtx_equal_p (dst, op0)
13341 && register_operand (op1, SImode))
13342 break;
13343
13344 if (low_register_operand (op0, SImode))
13345 {
13346 /* ADDS <Rd>,<Rn>,<Rm> */
13347 if (low_register_operand (op1, SImode))
13348 action = CONV;
13349 /* ADDS <Rdn>,#<imm8> */
13350 /* SUBS <Rdn>,#<imm8> */
13351 else if (rtx_equal_p (dst, op0)
13352 && CONST_INT_P (op1)
13353 && IN_RANGE (INTVAL (op1), -255, 255))
13354 action = CONV;
13355 /* ADDS <Rd>,<Rn>,#<imm3> */
13356 /* SUBS <Rd>,<Rn>,#<imm3> */
13357 else if (CONST_INT_P (op1)
13358 && IN_RANGE (INTVAL (op1), -7, 7))
13359 action = CONV;
13360 }
13361 break;
13362
13363 case MINUS:
13364 /* RSBS <Rd>,<Rn>,#0
13365 Not handled here: see NEG below. */
13366 /* SUBS <Rd>,<Rn>,#<imm3>
13367 SUBS <Rdn>,#<imm8>
13368 Not handled here: see PLUS above. */
13369 /* SUBS <Rd>,<Rn>,<Rm> */
13370 if (low_register_operand (op0, SImode)
13371 && low_register_operand (op1, SImode))
13372 action = CONV;
13373 break;
13374
13375 case MULT:
13376 /* MULS <Rdm>,<Rn>,<Rdm>
13377 As an exception to the rule, this is only used
13378 when optimizing for size since MULS is slow on all
13379 known implementations. We do not even want to use
13380 MULS in cold code, if optimizing for speed, so we
13381 test the global flag here. */
13382 if (!optimize_size)
13383 break;
13384 /* else fall through. */
13385 case AND:
13386 case IOR:
13387 case XOR:
13388 /* ANDS <Rdn>,<Rm> */
13389 if (rtx_equal_p (dst, op0)
13390 && low_register_operand (op1, SImode))
13391 action = CONV;
13392 else if (rtx_equal_p (dst, op1)
13393 && low_register_operand (op0, SImode))
13394 action = SWAP_CONV;
13395 break;
13396
13397 case ASHIFTRT:
13398 case ASHIFT:
13399 case LSHIFTRT:
13400 /* ASRS <Rdn>,<Rm> */
13401 /* LSRS <Rdn>,<Rm> */
13402 /* LSLS <Rdn>,<Rm> */
13403 if (rtx_equal_p (dst, op0)
13404 && low_register_operand (op1, SImode))
13405 action = CONV;
13406 /* ASRS <Rd>,<Rm>,#<imm5> */
13407 /* LSRS <Rd>,<Rm>,#<imm5> */
13408 /* LSLS <Rd>,<Rm>,#<imm5> */
13409 else if (low_register_operand (op0, SImode)
13410 && CONST_INT_P (op1)
13411 && IN_RANGE (INTVAL (op1), 0, 31))
13412 action = CONV;
13413 break;
13414
13415 case ROTATERT:
13416 /* RORS <Rdn>,<Rm> */
13417 if (rtx_equal_p (dst, op0)
13418 && low_register_operand (op1, SImode))
13419 action = CONV;
13420 break;
13421
13422 case NOT:
13423 case NEG:
13424 /* MVNS <Rd>,<Rm> */
13425 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13426 if (low_register_operand (op0, SImode))
13427 action = CONV;
13428 break;
13429
13430 case CONST_INT:
13431 /* MOVS <Rd>,#<imm8> */
13432 if (CONST_INT_P (src)
13433 && IN_RANGE (INTVAL (src), 0, 255))
13434 action = CONV;
13435 break;
13436
13437 case REG:
13438 /* MOVS and MOV<c> with registers have different
13439 encodings, so are not relevant here. */
13440 break;
13441
13442 default:
13443 break;
13444 }
13445 }
13446
13447 if (action != SKIP)
13448 {
13449 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13450 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13451 rtvec vec;
13452
13453 if (action == SWAP_CONV)
13454 {
13455 src = copy_rtx (src);
13456 XEXP (src, 0) = op1;
13457 XEXP (src, 1) = op0;
13458 pat = gen_rtx_SET (VOIDmode, dst, src);
13459 vec = gen_rtvec (2, pat, clobber);
13460 }
13461 else /* action == CONV */
13462 vec = gen_rtvec (2, pat, clobber);
13463
13464 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13465 INSN_CODE (insn) = -1;
13466 }
13467 }
13468
13469 if (NONDEBUG_INSN_P (insn))
13470 df_simulate_one_insn_backwards (bb, insn, &live);
13471 }
13472 }
13473
13474 CLEAR_REG_SET (&live);
13475 }
13476
13477 /* Gcc puts the pool in the wrong place for ARM, since we can only
13478 load addresses a limited distance around the pc. We do some
13479 special munging to move the constant pool values to the correct
13480 point in the code. */
13481 static void
13482 arm_reorg (void)
13483 {
13484 rtx insn;
13485 HOST_WIDE_INT address = 0;
13486 Mfix * fix;
13487
13488 if (TARGET_THUMB2)
13489 thumb2_reorg ();
13490
13491 /* Ensure all insns that must be split have been split at this point.
13492 Otherwise, the pool placement code below may compute incorrect
13493 insn lengths. Note that when optimizing, all insns have already
13494 been split at this point. */
13495 if (!optimize)
13496 split_all_insns_noflow ();
13497
13498 minipool_fix_head = minipool_fix_tail = NULL;
13499
13500 /* The first insn must always be a note, or the code below won't
13501 scan it properly. */
13502 insn = get_insns ();
13503 gcc_assert (NOTE_P (insn));
13504 minipool_pad = 0;
13505
13506 /* Scan all the insns and record the operands that will need fixing. */
13507 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13508 {
13509 if (BARRIER_P (insn))
13510 push_minipool_barrier (insn, address);
13511 else if (INSN_P (insn))
13512 {
13513 rtx table;
13514
13515 note_invalid_constants (insn, address, true);
13516 address += get_attr_length (insn);
13517
13518 /* If the insn is a vector jump, add the size of the table
13519 and skip the table. */
13520 if ((table = is_jump_table (insn)) != NULL)
13521 {
13522 address += get_jump_table_size (table);
13523 insn = table;
13524 }
13525 }
13526 else if (LABEL_P (insn))
13527 /* Add the worst-case padding due to alignment. We don't add
13528 the _current_ padding because the minipool insertions
13529 themselves might change it. */
13530 address += get_label_padding (insn);
13531 }
13532
13533 fix = minipool_fix_head;
13534
13535 /* Now scan the fixups and perform the required changes. */
13536 while (fix)
13537 {
13538 Mfix * ftmp;
13539 Mfix * fdel;
13540 Mfix * last_added_fix;
13541 Mfix * last_barrier = NULL;
13542 Mfix * this_fix;
13543
13544 /* Skip any further barriers before the next fix. */
13545 while (fix && BARRIER_P (fix->insn))
13546 fix = fix->next;
13547
13548 /* No more fixes. */
13549 if (fix == NULL)
13550 break;
13551
13552 last_added_fix = NULL;
13553
13554 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13555 {
13556 if (BARRIER_P (ftmp->insn))
13557 {
13558 if (ftmp->address >= minipool_vector_head->max_address)
13559 break;
13560
13561 last_barrier = ftmp;
13562 }
13563 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13564 break;
13565
13566 last_added_fix = ftmp; /* Keep track of the last fix added. */
13567 }
13568
13569 /* If we found a barrier, drop back to that; any fixes that we
13570 could have reached but come after the barrier will now go in
13571 the next mini-pool. */
13572 if (last_barrier != NULL)
13573 {
13574 /* Reduce the refcount for those fixes that won't go into this
13575 pool after all. */
13576 for (fdel = last_barrier->next;
13577 fdel && fdel != ftmp;
13578 fdel = fdel->next)
13579 {
13580 fdel->minipool->refcount--;
13581 fdel->minipool = NULL;
13582 }
13583
13584 ftmp = last_barrier;
13585 }
13586 else
13587 {
13588 /* ftmp is first fix that we can't fit into this pool and
13589 there no natural barriers that we could use. Insert a
13590 new barrier in the code somewhere between the previous
13591 fix and this one, and arrange to jump around it. */
13592 HOST_WIDE_INT max_address;
13593
13594 /* The last item on the list of fixes must be a barrier, so
13595 we can never run off the end of the list of fixes without
13596 last_barrier being set. */
13597 gcc_assert (ftmp);
13598
13599 max_address = minipool_vector_head->max_address;
13600 /* Check that there isn't another fix that is in range that
13601 we couldn't fit into this pool because the pool was
13602 already too large: we need to put the pool before such an
13603 instruction. The pool itself may come just after the
13604 fix because create_fix_barrier also allows space for a
13605 jump instruction. */
13606 if (ftmp->address < max_address)
13607 max_address = ftmp->address + 1;
13608
13609 last_barrier = create_fix_barrier (last_added_fix, max_address);
13610 }
13611
13612 assign_minipool_offsets (last_barrier);
13613
13614 while (ftmp)
13615 {
13616 if (!BARRIER_P (ftmp->insn)
13617 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13618 == NULL))
13619 break;
13620
13621 ftmp = ftmp->next;
13622 }
13623
13624 /* Scan over the fixes we have identified for this pool, fixing them
13625 up and adding the constants to the pool itself. */
13626 for (this_fix = fix; this_fix && ftmp != this_fix;
13627 this_fix = this_fix->next)
13628 if (!BARRIER_P (this_fix->insn))
13629 {
13630 rtx addr
13631 = plus_constant (Pmode,
13632 gen_rtx_LABEL_REF (VOIDmode,
13633 minipool_vector_label),
13634 this_fix->minipool->offset);
13635 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13636 }
13637
13638 dump_minipool (last_barrier->insn);
13639 fix = ftmp;
13640 }
13641
13642 /* From now on we must synthesize any constants that we can't handle
13643 directly. This can happen if the RTL gets split during final
13644 instruction generation. */
13645 after_arm_reorg = 1;
13646
13647 /* Free the minipool memory. */
13648 obstack_free (&minipool_obstack, minipool_startobj);
13649 }
13650 \f
13651 /* Routines to output assembly language. */
13652
13653 /* If the rtx is the correct value then return the string of the number.
13654 In this way we can ensure that valid double constants are generated even
13655 when cross compiling. */
13656 const char *
13657 fp_immediate_constant (rtx x)
13658 {
13659 REAL_VALUE_TYPE r;
13660
13661 if (!fp_consts_inited)
13662 init_fp_table ();
13663
13664 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13665
13666 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13667 return "0";
13668 }
13669
13670 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13671 static const char *
13672 fp_const_from_val (REAL_VALUE_TYPE *r)
13673 {
13674 if (!fp_consts_inited)
13675 init_fp_table ();
13676
13677 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13678 return "0";
13679 }
13680
13681 /* OPERANDS[0] is the entire list of insns that constitute pop,
13682 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13683 is in the list, UPDATE is true iff the list contains explicit
13684 update of base register. */
13685 void
13686 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13687 bool update)
13688 {
13689 int i;
13690 char pattern[100];
13691 int offset;
13692 const char *conditional;
13693 int num_saves = XVECLEN (operands[0], 0);
13694 unsigned int regno;
13695 unsigned int regno_base = REGNO (operands[1]);
13696
13697 offset = 0;
13698 offset += update ? 1 : 0;
13699 offset += return_pc ? 1 : 0;
13700
13701 /* Is the base register in the list? */
13702 for (i = offset; i < num_saves; i++)
13703 {
13704 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13705 /* If SP is in the list, then the base register must be SP. */
13706 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13707 /* If base register is in the list, there must be no explicit update. */
13708 if (regno == regno_base)
13709 gcc_assert (!update);
13710 }
13711
13712 conditional = reverse ? "%?%D0" : "%?%d0";
13713 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13714 {
13715 /* Output pop (not stmfd) because it has a shorter encoding. */
13716 gcc_assert (update);
13717 sprintf (pattern, "pop%s\t{", conditional);
13718 }
13719 else
13720 {
13721 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13722 It's just a convention, their semantics are identical. */
13723 if (regno_base == SP_REGNUM)
13724 sprintf (pattern, "ldm%sfd\t", conditional);
13725 else if (TARGET_UNIFIED_ASM)
13726 sprintf (pattern, "ldmia%s\t", conditional);
13727 else
13728 sprintf (pattern, "ldm%sia\t", conditional);
13729
13730 strcat (pattern, reg_names[regno_base]);
13731 if (update)
13732 strcat (pattern, "!, {");
13733 else
13734 strcat (pattern, ", {");
13735 }
13736
13737 /* Output the first destination register. */
13738 strcat (pattern,
13739 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13740
13741 /* Output the rest of the destination registers. */
13742 for (i = offset + 1; i < num_saves; i++)
13743 {
13744 strcat (pattern, ", ");
13745 strcat (pattern,
13746 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13747 }
13748
13749 strcat (pattern, "}");
13750
13751 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13752 strcat (pattern, "^");
13753
13754 output_asm_insn (pattern, &cond);
13755 }
13756
13757
13758 /* Output the assembly for a store multiple. */
13759
13760 const char *
13761 vfp_output_fstmd (rtx * operands)
13762 {
13763 char pattern[100];
13764 int p;
13765 int base;
13766 int i;
13767
13768 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13769 p = strlen (pattern);
13770
13771 gcc_assert (REG_P (operands[1]));
13772
13773 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13774 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13775 {
13776 p += sprintf (&pattern[p], ", d%d", base + i);
13777 }
13778 strcpy (&pattern[p], "}");
13779
13780 output_asm_insn (pattern, operands);
13781 return "";
13782 }
13783
13784
13785 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13786 number of bytes pushed. */
13787
13788 static int
13789 vfp_emit_fstmd (int base_reg, int count)
13790 {
13791 rtx par;
13792 rtx dwarf;
13793 rtx tmp, reg;
13794 int i;
13795
13796 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13797 register pairs are stored by a store multiple insn. We avoid this
13798 by pushing an extra pair. */
13799 if (count == 2 && !arm_arch6)
13800 {
13801 if (base_reg == LAST_VFP_REGNUM - 3)
13802 base_reg -= 2;
13803 count++;
13804 }
13805
13806 /* FSTMD may not store more than 16 doubleword registers at once. Split
13807 larger stores into multiple parts (up to a maximum of two, in
13808 practice). */
13809 if (count > 16)
13810 {
13811 int saved;
13812 /* NOTE: base_reg is an internal register number, so each D register
13813 counts as 2. */
13814 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13815 saved += vfp_emit_fstmd (base_reg, 16);
13816 return saved;
13817 }
13818
13819 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13820 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13821
13822 reg = gen_rtx_REG (DFmode, base_reg);
13823 base_reg += 2;
13824
13825 XVECEXP (par, 0, 0)
13826 = gen_rtx_SET (VOIDmode,
13827 gen_frame_mem
13828 (BLKmode,
13829 gen_rtx_PRE_MODIFY (Pmode,
13830 stack_pointer_rtx,
13831 plus_constant
13832 (Pmode, stack_pointer_rtx,
13833 - (count * 8)))
13834 ),
13835 gen_rtx_UNSPEC (BLKmode,
13836 gen_rtvec (1, reg),
13837 UNSPEC_PUSH_MULT));
13838
13839 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13840 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13841 RTX_FRAME_RELATED_P (tmp) = 1;
13842 XVECEXP (dwarf, 0, 0) = tmp;
13843
13844 tmp = gen_rtx_SET (VOIDmode,
13845 gen_frame_mem (DFmode, stack_pointer_rtx),
13846 reg);
13847 RTX_FRAME_RELATED_P (tmp) = 1;
13848 XVECEXP (dwarf, 0, 1) = tmp;
13849
13850 for (i = 1; i < count; i++)
13851 {
13852 reg = gen_rtx_REG (DFmode, base_reg);
13853 base_reg += 2;
13854 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13855
13856 tmp = gen_rtx_SET (VOIDmode,
13857 gen_frame_mem (DFmode,
13858 plus_constant (Pmode,
13859 stack_pointer_rtx,
13860 i * 8)),
13861 reg);
13862 RTX_FRAME_RELATED_P (tmp) = 1;
13863 XVECEXP (dwarf, 0, i + 1) = tmp;
13864 }
13865
13866 par = emit_insn (par);
13867 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13868 RTX_FRAME_RELATED_P (par) = 1;
13869
13870 return count * 8;
13871 }
13872
13873 /* Emit a call instruction with pattern PAT. ADDR is the address of
13874 the call target. */
13875
13876 void
13877 arm_emit_call_insn (rtx pat, rtx addr)
13878 {
13879 rtx insn;
13880
13881 insn = emit_call_insn (pat);
13882
13883 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13884 If the call might use such an entry, add a use of the PIC register
13885 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13886 if (TARGET_VXWORKS_RTP
13887 && flag_pic
13888 && GET_CODE (addr) == SYMBOL_REF
13889 && (SYMBOL_REF_DECL (addr)
13890 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13891 : !SYMBOL_REF_LOCAL_P (addr)))
13892 {
13893 require_pic_register ();
13894 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13895 }
13896 }
13897
13898 /* Output a 'call' insn. */
13899 const char *
13900 output_call (rtx *operands)
13901 {
13902 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13903
13904 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13905 if (REGNO (operands[0]) == LR_REGNUM)
13906 {
13907 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13908 output_asm_insn ("mov%?\t%0, %|lr", operands);
13909 }
13910
13911 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13912
13913 if (TARGET_INTERWORK || arm_arch4t)
13914 output_asm_insn ("bx%?\t%0", operands);
13915 else
13916 output_asm_insn ("mov%?\t%|pc, %0", operands);
13917
13918 return "";
13919 }
13920
13921 /* Output a 'call' insn that is a reference in memory. This is
13922 disabled for ARMv5 and we prefer a blx instead because otherwise
13923 there's a significant performance overhead. */
13924 const char *
13925 output_call_mem (rtx *operands)
13926 {
13927 gcc_assert (!arm_arch5);
13928 if (TARGET_INTERWORK)
13929 {
13930 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13931 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13932 output_asm_insn ("bx%?\t%|ip", operands);
13933 }
13934 else if (regno_use_in (LR_REGNUM, operands[0]))
13935 {
13936 /* LR is used in the memory address. We load the address in the
13937 first instruction. It's safe to use IP as the target of the
13938 load since the call will kill it anyway. */
13939 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13940 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13941 if (arm_arch4t)
13942 output_asm_insn ("bx%?\t%|ip", operands);
13943 else
13944 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13945 }
13946 else
13947 {
13948 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13949 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13950 }
13951
13952 return "";
13953 }
13954
13955
13956 /* Output a move from arm registers to arm registers of a long double
13957 OPERANDS[0] is the destination.
13958 OPERANDS[1] is the source. */
13959 const char *
13960 output_mov_long_double_arm_from_arm (rtx *operands)
13961 {
13962 /* We have to be careful here because the two might overlap. */
13963 int dest_start = REGNO (operands[0]);
13964 int src_start = REGNO (operands[1]);
13965 rtx ops[2];
13966 int i;
13967
13968 if (dest_start < src_start)
13969 {
13970 for (i = 0; i < 3; i++)
13971 {
13972 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13973 ops[1] = gen_rtx_REG (SImode, src_start + i);
13974 output_asm_insn ("mov%?\t%0, %1", ops);
13975 }
13976 }
13977 else
13978 {
13979 for (i = 2; i >= 0; i--)
13980 {
13981 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13982 ops[1] = gen_rtx_REG (SImode, src_start + i);
13983 output_asm_insn ("mov%?\t%0, %1", ops);
13984 }
13985 }
13986
13987 return "";
13988 }
13989
13990 void
13991 arm_emit_movpair (rtx dest, rtx src)
13992 {
13993 /* If the src is an immediate, simplify it. */
13994 if (CONST_INT_P (src))
13995 {
13996 HOST_WIDE_INT val = INTVAL (src);
13997 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13998 if ((val >> 16) & 0x0000ffff)
13999 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14000 GEN_INT (16)),
14001 GEN_INT ((val >> 16) & 0x0000ffff));
14002 return;
14003 }
14004 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14005 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14006 }
14007
14008 /* Output a move between double words. It must be REG<-MEM
14009 or MEM<-REG. */
14010 const char *
14011 output_move_double (rtx *operands, bool emit, int *count)
14012 {
14013 enum rtx_code code0 = GET_CODE (operands[0]);
14014 enum rtx_code code1 = GET_CODE (operands[1]);
14015 rtx otherops[3];
14016 if (count)
14017 *count = 1;
14018
14019 /* The only case when this might happen is when
14020 you are looking at the length of a DImode instruction
14021 that has an invalid constant in it. */
14022 if (code0 == REG && code1 != MEM)
14023 {
14024 gcc_assert (!emit);
14025 *count = 2;
14026 return "";
14027 }
14028
14029 if (code0 == REG)
14030 {
14031 unsigned int reg0 = REGNO (operands[0]);
14032
14033 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14034
14035 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14036
14037 switch (GET_CODE (XEXP (operands[1], 0)))
14038 {
14039 case REG:
14040
14041 if (emit)
14042 {
14043 if (TARGET_LDRD
14044 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14045 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14046 else
14047 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14048 }
14049 break;
14050
14051 case PRE_INC:
14052 gcc_assert (TARGET_LDRD);
14053 if (emit)
14054 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14055 break;
14056
14057 case PRE_DEC:
14058 if (emit)
14059 {
14060 if (TARGET_LDRD)
14061 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14062 else
14063 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14064 }
14065 break;
14066
14067 case POST_INC:
14068 if (emit)
14069 {
14070 if (TARGET_LDRD)
14071 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14072 else
14073 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14074 }
14075 break;
14076
14077 case POST_DEC:
14078 gcc_assert (TARGET_LDRD);
14079 if (emit)
14080 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14081 break;
14082
14083 case PRE_MODIFY:
14084 case POST_MODIFY:
14085 /* Autoicrement addressing modes should never have overlapping
14086 base and destination registers, and overlapping index registers
14087 are already prohibited, so this doesn't need to worry about
14088 fix_cm3_ldrd. */
14089 otherops[0] = operands[0];
14090 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14091 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14092
14093 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14094 {
14095 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14096 {
14097 /* Registers overlap so split out the increment. */
14098 if (emit)
14099 {
14100 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14101 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14102 }
14103 if (count)
14104 *count = 2;
14105 }
14106 else
14107 {
14108 /* Use a single insn if we can.
14109 FIXME: IWMMXT allows offsets larger than ldrd can
14110 handle, fix these up with a pair of ldr. */
14111 if (TARGET_THUMB2
14112 || !CONST_INT_P (otherops[2])
14113 || (INTVAL (otherops[2]) > -256
14114 && INTVAL (otherops[2]) < 256))
14115 {
14116 if (emit)
14117 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14118 }
14119 else
14120 {
14121 if (emit)
14122 {
14123 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14124 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14125 }
14126 if (count)
14127 *count = 2;
14128
14129 }
14130 }
14131 }
14132 else
14133 {
14134 /* Use a single insn if we can.
14135 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14136 fix these up with a pair of ldr. */
14137 if (TARGET_THUMB2
14138 || !CONST_INT_P (otherops[2])
14139 || (INTVAL (otherops[2]) > -256
14140 && INTVAL (otherops[2]) < 256))
14141 {
14142 if (emit)
14143 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14144 }
14145 else
14146 {
14147 if (emit)
14148 {
14149 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14150 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14151 }
14152 if (count)
14153 *count = 2;
14154 }
14155 }
14156 break;
14157
14158 case LABEL_REF:
14159 case CONST:
14160 /* We might be able to use ldrd %0, %1 here. However the range is
14161 different to ldr/adr, and it is broken on some ARMv7-M
14162 implementations. */
14163 /* Use the second register of the pair to avoid problematic
14164 overlap. */
14165 otherops[1] = operands[1];
14166 if (emit)
14167 output_asm_insn ("adr%?\t%0, %1", otherops);
14168 operands[1] = otherops[0];
14169 if (emit)
14170 {
14171 if (TARGET_LDRD)
14172 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14173 else
14174 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14175 }
14176
14177 if (count)
14178 *count = 2;
14179 break;
14180
14181 /* ??? This needs checking for thumb2. */
14182 default:
14183 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14184 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14185 {
14186 otherops[0] = operands[0];
14187 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14188 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14189
14190 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14191 {
14192 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14193 {
14194 switch ((int) INTVAL (otherops[2]))
14195 {
14196 case -8:
14197 if (emit)
14198 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14199 return "";
14200 case -4:
14201 if (TARGET_THUMB2)
14202 break;
14203 if (emit)
14204 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14205 return "";
14206 case 4:
14207 if (TARGET_THUMB2)
14208 break;
14209 if (emit)
14210 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14211 return "";
14212 }
14213 }
14214 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14215 operands[1] = otherops[0];
14216 if (TARGET_LDRD
14217 && (REG_P (otherops[2])
14218 || TARGET_THUMB2
14219 || (CONST_INT_P (otherops[2])
14220 && INTVAL (otherops[2]) > -256
14221 && INTVAL (otherops[2]) < 256)))
14222 {
14223 if (reg_overlap_mentioned_p (operands[0],
14224 otherops[2]))
14225 {
14226 rtx tmp;
14227 /* Swap base and index registers over to
14228 avoid a conflict. */
14229 tmp = otherops[1];
14230 otherops[1] = otherops[2];
14231 otherops[2] = tmp;
14232 }
14233 /* If both registers conflict, it will usually
14234 have been fixed by a splitter. */
14235 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14236 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14237 {
14238 if (emit)
14239 {
14240 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14241 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14242 }
14243 if (count)
14244 *count = 2;
14245 }
14246 else
14247 {
14248 otherops[0] = operands[0];
14249 if (emit)
14250 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14251 }
14252 return "";
14253 }
14254
14255 if (CONST_INT_P (otherops[2]))
14256 {
14257 if (emit)
14258 {
14259 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14260 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14261 else
14262 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14263 }
14264 }
14265 else
14266 {
14267 if (emit)
14268 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14269 }
14270 }
14271 else
14272 {
14273 if (emit)
14274 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14275 }
14276
14277 if (count)
14278 *count = 2;
14279
14280 if (TARGET_LDRD)
14281 return "ldr%(d%)\t%0, [%1]";
14282
14283 return "ldm%(ia%)\t%1, %M0";
14284 }
14285 else
14286 {
14287 otherops[1] = adjust_address (operands[1], SImode, 4);
14288 /* Take care of overlapping base/data reg. */
14289 if (reg_mentioned_p (operands[0], operands[1]))
14290 {
14291 if (emit)
14292 {
14293 output_asm_insn ("ldr%?\t%0, %1", otherops);
14294 output_asm_insn ("ldr%?\t%0, %1", operands);
14295 }
14296 if (count)
14297 *count = 2;
14298
14299 }
14300 else
14301 {
14302 if (emit)
14303 {
14304 output_asm_insn ("ldr%?\t%0, %1", operands);
14305 output_asm_insn ("ldr%?\t%0, %1", otherops);
14306 }
14307 if (count)
14308 *count = 2;
14309 }
14310 }
14311 }
14312 }
14313 else
14314 {
14315 /* Constraints should ensure this. */
14316 gcc_assert (code0 == MEM && code1 == REG);
14317 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14318
14319 switch (GET_CODE (XEXP (operands[0], 0)))
14320 {
14321 case REG:
14322 if (emit)
14323 {
14324 if (TARGET_LDRD)
14325 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14326 else
14327 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14328 }
14329 break;
14330
14331 case PRE_INC:
14332 gcc_assert (TARGET_LDRD);
14333 if (emit)
14334 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14335 break;
14336
14337 case PRE_DEC:
14338 if (emit)
14339 {
14340 if (TARGET_LDRD)
14341 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14342 else
14343 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14344 }
14345 break;
14346
14347 case POST_INC:
14348 if (emit)
14349 {
14350 if (TARGET_LDRD)
14351 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14352 else
14353 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14354 }
14355 break;
14356
14357 case POST_DEC:
14358 gcc_assert (TARGET_LDRD);
14359 if (emit)
14360 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14361 break;
14362
14363 case PRE_MODIFY:
14364 case POST_MODIFY:
14365 otherops[0] = operands[1];
14366 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14367 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14368
14369 /* IWMMXT allows offsets larger than ldrd can handle,
14370 fix these up with a pair of ldr. */
14371 if (!TARGET_THUMB2
14372 && CONST_INT_P (otherops[2])
14373 && (INTVAL(otherops[2]) <= -256
14374 || INTVAL(otherops[2]) >= 256))
14375 {
14376 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14377 {
14378 if (emit)
14379 {
14380 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14381 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14382 }
14383 if (count)
14384 *count = 2;
14385 }
14386 else
14387 {
14388 if (emit)
14389 {
14390 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14391 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14392 }
14393 if (count)
14394 *count = 2;
14395 }
14396 }
14397 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14398 {
14399 if (emit)
14400 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14401 }
14402 else
14403 {
14404 if (emit)
14405 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14406 }
14407 break;
14408
14409 case PLUS:
14410 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14411 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14412 {
14413 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14414 {
14415 case -8:
14416 if (emit)
14417 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14418 return "";
14419
14420 case -4:
14421 if (TARGET_THUMB2)
14422 break;
14423 if (emit)
14424 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14425 return "";
14426
14427 case 4:
14428 if (TARGET_THUMB2)
14429 break;
14430 if (emit)
14431 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14432 return "";
14433 }
14434 }
14435 if (TARGET_LDRD
14436 && (REG_P (otherops[2])
14437 || TARGET_THUMB2
14438 || (CONST_INT_P (otherops[2])
14439 && INTVAL (otherops[2]) > -256
14440 && INTVAL (otherops[2]) < 256)))
14441 {
14442 otherops[0] = operands[1];
14443 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14444 if (emit)
14445 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14446 return "";
14447 }
14448 /* Fall through */
14449
14450 default:
14451 otherops[0] = adjust_address (operands[0], SImode, 4);
14452 otherops[1] = operands[1];
14453 if (emit)
14454 {
14455 output_asm_insn ("str%?\t%1, %0", operands);
14456 output_asm_insn ("str%?\t%H1, %0", otherops);
14457 }
14458 if (count)
14459 *count = 2;
14460 }
14461 }
14462
14463 return "";
14464 }
14465
14466 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14467 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14468
14469 const char *
14470 output_move_quad (rtx *operands)
14471 {
14472 if (REG_P (operands[0]))
14473 {
14474 /* Load, or reg->reg move. */
14475
14476 if (MEM_P (operands[1]))
14477 {
14478 switch (GET_CODE (XEXP (operands[1], 0)))
14479 {
14480 case REG:
14481 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14482 break;
14483
14484 case LABEL_REF:
14485 case CONST:
14486 output_asm_insn ("adr%?\t%0, %1", operands);
14487 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14488 break;
14489
14490 default:
14491 gcc_unreachable ();
14492 }
14493 }
14494 else
14495 {
14496 rtx ops[2];
14497 int dest, src, i;
14498
14499 gcc_assert (REG_P (operands[1]));
14500
14501 dest = REGNO (operands[0]);
14502 src = REGNO (operands[1]);
14503
14504 /* This seems pretty dumb, but hopefully GCC won't try to do it
14505 very often. */
14506 if (dest < src)
14507 for (i = 0; i < 4; i++)
14508 {
14509 ops[0] = gen_rtx_REG (SImode, dest + i);
14510 ops[1] = gen_rtx_REG (SImode, src + i);
14511 output_asm_insn ("mov%?\t%0, %1", ops);
14512 }
14513 else
14514 for (i = 3; i >= 0; i--)
14515 {
14516 ops[0] = gen_rtx_REG (SImode, dest + i);
14517 ops[1] = gen_rtx_REG (SImode, src + i);
14518 output_asm_insn ("mov%?\t%0, %1", ops);
14519 }
14520 }
14521 }
14522 else
14523 {
14524 gcc_assert (MEM_P (operands[0]));
14525 gcc_assert (REG_P (operands[1]));
14526 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14527
14528 switch (GET_CODE (XEXP (operands[0], 0)))
14529 {
14530 case REG:
14531 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14532 break;
14533
14534 default:
14535 gcc_unreachable ();
14536 }
14537 }
14538
14539 return "";
14540 }
14541
14542 /* Output a VFP load or store instruction. */
14543
14544 const char *
14545 output_move_vfp (rtx *operands)
14546 {
14547 rtx reg, mem, addr, ops[2];
14548 int load = REG_P (operands[0]);
14549 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14550 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14551 const char *templ;
14552 char buff[50];
14553 enum machine_mode mode;
14554
14555 reg = operands[!load];
14556 mem = operands[load];
14557
14558 mode = GET_MODE (reg);
14559
14560 gcc_assert (REG_P (reg));
14561 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14562 gcc_assert (mode == SFmode
14563 || mode == DFmode
14564 || mode == SImode
14565 || mode == DImode
14566 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14567 gcc_assert (MEM_P (mem));
14568
14569 addr = XEXP (mem, 0);
14570
14571 switch (GET_CODE (addr))
14572 {
14573 case PRE_DEC:
14574 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14575 ops[0] = XEXP (addr, 0);
14576 ops[1] = reg;
14577 break;
14578
14579 case POST_INC:
14580 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14581 ops[0] = XEXP (addr, 0);
14582 ops[1] = reg;
14583 break;
14584
14585 default:
14586 templ = "f%s%c%%?\t%%%s0, %%1%s";
14587 ops[0] = reg;
14588 ops[1] = mem;
14589 break;
14590 }
14591
14592 sprintf (buff, templ,
14593 load ? "ld" : "st",
14594 dp ? 'd' : 's',
14595 dp ? "P" : "",
14596 integer_p ? "\t%@ int" : "");
14597 output_asm_insn (buff, ops);
14598
14599 return "";
14600 }
14601
14602 /* Output a Neon double-word or quad-word load or store, or a load
14603 or store for larger structure modes.
14604
14605 WARNING: The ordering of elements is weird in big-endian mode,
14606 because the EABI requires that vectors stored in memory appear
14607 as though they were stored by a VSTM, as required by the EABI.
14608 GCC RTL defines element ordering based on in-memory order.
14609 This can be different from the architectural ordering of elements
14610 within a NEON register. The intrinsics defined in arm_neon.h use the
14611 NEON register element ordering, not the GCC RTL element ordering.
14612
14613 For example, the in-memory ordering of a big-endian a quadword
14614 vector with 16-bit elements when stored from register pair {d0,d1}
14615 will be (lowest address first, d0[N] is NEON register element N):
14616
14617 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14618
14619 When necessary, quadword registers (dN, dN+1) are moved to ARM
14620 registers from rN in the order:
14621
14622 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14623
14624 So that STM/LDM can be used on vectors in ARM registers, and the
14625 same memory layout will result as if VSTM/VLDM were used.
14626
14627 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
14628 possible, which allows use of appropriate alignment tags.
14629 Note that the choice of "64" is independent of the actual vector
14630 element size; this size simply ensures that the behavior is
14631 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
14632
14633 Due to limitations of those instructions, use of VST1.64/VLD1.64
14634 is not possible if:
14635 - the address contains PRE_DEC, or
14636 - the mode refers to more than 4 double-word registers
14637
14638 In those cases, it would be possible to replace VSTM/VLDM by a
14639 sequence of instructions; this is not currently implemented since
14640 this is not certain to actually improve performance. */
14641
14642 const char *
14643 output_move_neon (rtx *operands)
14644 {
14645 rtx reg, mem, addr, ops[2];
14646 int regno, nregs, load = REG_P (operands[0]);
14647 const char *templ;
14648 char buff[50];
14649 enum machine_mode mode;
14650
14651 reg = operands[!load];
14652 mem = operands[load];
14653
14654 mode = GET_MODE (reg);
14655
14656 gcc_assert (REG_P (reg));
14657 regno = REGNO (reg);
14658 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
14659 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14660 || NEON_REGNO_OK_FOR_QUAD (regno));
14661 gcc_assert (VALID_NEON_DREG_MODE (mode)
14662 || VALID_NEON_QREG_MODE (mode)
14663 || VALID_NEON_STRUCT_MODE (mode));
14664 gcc_assert (MEM_P (mem));
14665
14666 addr = XEXP (mem, 0);
14667
14668 /* Strip off const from addresses like (const (plus (...))). */
14669 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14670 addr = XEXP (addr, 0);
14671
14672 switch (GET_CODE (addr))
14673 {
14674 case POST_INC:
14675 /* We have to use vldm / vstm for too-large modes. */
14676 if (nregs > 4)
14677 {
14678 templ = "v%smia%%?\t%%0!, %%h1";
14679 ops[0] = XEXP (addr, 0);
14680 }
14681 else
14682 {
14683 templ = "v%s1.64\t%%h1, %%A0";
14684 ops[0] = mem;
14685 }
14686 ops[1] = reg;
14687 break;
14688
14689 case PRE_DEC:
14690 /* We have to use vldm / vstm in this case, since there is no
14691 pre-decrement form of the vld1 / vst1 instructions. */
14692 templ = "v%smdb%%?\t%%0!, %%h1";
14693 ops[0] = XEXP (addr, 0);
14694 ops[1] = reg;
14695 break;
14696
14697 case POST_MODIFY:
14698 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14699 gcc_unreachable ();
14700
14701 case LABEL_REF:
14702 case PLUS:
14703 {
14704 int i;
14705 int overlap = -1;
14706 for (i = 0; i < nregs; i++)
14707 {
14708 /* We're only using DImode here because it's a convenient size. */
14709 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14710 ops[1] = adjust_address (mem, DImode, 8 * i);
14711 if (reg_overlap_mentioned_p (ops[0], mem))
14712 {
14713 gcc_assert (overlap == -1);
14714 overlap = i;
14715 }
14716 else
14717 {
14718 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14719 output_asm_insn (buff, ops);
14720 }
14721 }
14722 if (overlap != -1)
14723 {
14724 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14725 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14726 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14727 output_asm_insn (buff, ops);
14728 }
14729
14730 return "";
14731 }
14732
14733 default:
14734 /* We have to use vldm / vstm for too-large modes. */
14735 if (nregs > 4)
14736 templ = "v%smia%%?\t%%m0, %%h1";
14737 else
14738 templ = "v%s1.64\t%%h1, %%A0";
14739
14740 ops[0] = mem;
14741 ops[1] = reg;
14742 }
14743
14744 sprintf (buff, templ, load ? "ld" : "st");
14745 output_asm_insn (buff, ops);
14746
14747 return "";
14748 }
14749
14750 /* Compute and return the length of neon_mov<mode>, where <mode> is
14751 one of VSTRUCT modes: EI, OI, CI or XI. */
14752 int
14753 arm_attr_length_move_neon (rtx insn)
14754 {
14755 rtx reg, mem, addr;
14756 int load;
14757 enum machine_mode mode;
14758
14759 extract_insn_cached (insn);
14760
14761 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14762 {
14763 mode = GET_MODE (recog_data.operand[0]);
14764 switch (mode)
14765 {
14766 case EImode:
14767 case OImode:
14768 return 8;
14769 case CImode:
14770 return 12;
14771 case XImode:
14772 return 16;
14773 default:
14774 gcc_unreachable ();
14775 }
14776 }
14777
14778 load = REG_P (recog_data.operand[0]);
14779 reg = recog_data.operand[!load];
14780 mem = recog_data.operand[load];
14781
14782 gcc_assert (MEM_P (mem));
14783
14784 mode = GET_MODE (reg);
14785 addr = XEXP (mem, 0);
14786
14787 /* Strip off const from addresses like (const (plus (...))). */
14788 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14789 addr = XEXP (addr, 0);
14790
14791 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14792 {
14793 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14794 return insns * 4;
14795 }
14796 else
14797 return 4;
14798 }
14799
14800 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14801 return zero. */
14802
14803 int
14804 arm_address_offset_is_imm (rtx insn)
14805 {
14806 rtx mem, addr;
14807
14808 extract_insn_cached (insn);
14809
14810 if (REG_P (recog_data.operand[0]))
14811 return 0;
14812
14813 mem = recog_data.operand[0];
14814
14815 gcc_assert (MEM_P (mem));
14816
14817 addr = XEXP (mem, 0);
14818
14819 if (REG_P (addr)
14820 || (GET_CODE (addr) == PLUS
14821 && REG_P (XEXP (addr, 0))
14822 && CONST_INT_P (XEXP (addr, 1))))
14823 return 1;
14824 else
14825 return 0;
14826 }
14827
14828 /* Output an ADD r, s, #n where n may be too big for one instruction.
14829 If adding zero to one register, output nothing. */
14830 const char *
14831 output_add_immediate (rtx *operands)
14832 {
14833 HOST_WIDE_INT n = INTVAL (operands[2]);
14834
14835 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14836 {
14837 if (n < 0)
14838 output_multi_immediate (operands,
14839 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14840 -n);
14841 else
14842 output_multi_immediate (operands,
14843 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14844 n);
14845 }
14846
14847 return "";
14848 }
14849
14850 /* Output a multiple immediate operation.
14851 OPERANDS is the vector of operands referred to in the output patterns.
14852 INSTR1 is the output pattern to use for the first constant.
14853 INSTR2 is the output pattern to use for subsequent constants.
14854 IMMED_OP is the index of the constant slot in OPERANDS.
14855 N is the constant value. */
14856 static const char *
14857 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14858 int immed_op, HOST_WIDE_INT n)
14859 {
14860 #if HOST_BITS_PER_WIDE_INT > 32
14861 n &= 0xffffffff;
14862 #endif
14863
14864 if (n == 0)
14865 {
14866 /* Quick and easy output. */
14867 operands[immed_op] = const0_rtx;
14868 output_asm_insn (instr1, operands);
14869 }
14870 else
14871 {
14872 int i;
14873 const char * instr = instr1;
14874
14875 /* Note that n is never zero here (which would give no output). */
14876 for (i = 0; i < 32; i += 2)
14877 {
14878 if (n & (3 << i))
14879 {
14880 operands[immed_op] = GEN_INT (n & (255 << i));
14881 output_asm_insn (instr, operands);
14882 instr = instr2;
14883 i += 6;
14884 }
14885 }
14886 }
14887
14888 return "";
14889 }
14890
14891 /* Return the name of a shifter operation. */
14892 static const char *
14893 arm_shift_nmem(enum rtx_code code)
14894 {
14895 switch (code)
14896 {
14897 case ASHIFT:
14898 return ARM_LSL_NAME;
14899
14900 case ASHIFTRT:
14901 return "asr";
14902
14903 case LSHIFTRT:
14904 return "lsr";
14905
14906 case ROTATERT:
14907 return "ror";
14908
14909 default:
14910 abort();
14911 }
14912 }
14913
14914 /* Return the appropriate ARM instruction for the operation code.
14915 The returned result should not be overwritten. OP is the rtx of the
14916 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14917 was shifted. */
14918 const char *
14919 arithmetic_instr (rtx op, int shift_first_arg)
14920 {
14921 switch (GET_CODE (op))
14922 {
14923 case PLUS:
14924 return "add";
14925
14926 case MINUS:
14927 return shift_first_arg ? "rsb" : "sub";
14928
14929 case IOR:
14930 return "orr";
14931
14932 case XOR:
14933 return "eor";
14934
14935 case AND:
14936 return "and";
14937
14938 case ASHIFT:
14939 case ASHIFTRT:
14940 case LSHIFTRT:
14941 case ROTATERT:
14942 return arm_shift_nmem(GET_CODE(op));
14943
14944 default:
14945 gcc_unreachable ();
14946 }
14947 }
14948
14949 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14950 for the operation code. The returned result should not be overwritten.
14951 OP is the rtx code of the shift.
14952 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14953 shift. */
14954 static const char *
14955 shift_op (rtx op, HOST_WIDE_INT *amountp)
14956 {
14957 const char * mnem;
14958 enum rtx_code code = GET_CODE (op);
14959
14960 switch (GET_CODE (XEXP (op, 1)))
14961 {
14962 case REG:
14963 case SUBREG:
14964 *amountp = -1;
14965 break;
14966
14967 case CONST_INT:
14968 *amountp = INTVAL (XEXP (op, 1));
14969 break;
14970
14971 default:
14972 gcc_unreachable ();
14973 }
14974
14975 switch (code)
14976 {
14977 case ROTATE:
14978 gcc_assert (*amountp != -1);
14979 *amountp = 32 - *amountp;
14980 code = ROTATERT;
14981
14982 /* Fall through. */
14983
14984 case ASHIFT:
14985 case ASHIFTRT:
14986 case LSHIFTRT:
14987 case ROTATERT:
14988 mnem = arm_shift_nmem(code);
14989 break;
14990
14991 case MULT:
14992 /* We never have to worry about the amount being other than a
14993 power of 2, since this case can never be reloaded from a reg. */
14994 gcc_assert (*amountp != -1);
14995 *amountp = int_log2 (*amountp);
14996 return ARM_LSL_NAME;
14997
14998 default:
14999 gcc_unreachable ();
15000 }
15001
15002 if (*amountp != -1)
15003 {
15004 /* This is not 100% correct, but follows from the desire to merge
15005 multiplication by a power of 2 with the recognizer for a
15006 shift. >=32 is not a valid shift for "lsl", so we must try and
15007 output a shift that produces the correct arithmetical result.
15008 Using lsr #32 is identical except for the fact that the carry bit
15009 is not set correctly if we set the flags; but we never use the
15010 carry bit from such an operation, so we can ignore that. */
15011 if (code == ROTATERT)
15012 /* Rotate is just modulo 32. */
15013 *amountp &= 31;
15014 else if (*amountp != (*amountp & 31))
15015 {
15016 if (code == ASHIFT)
15017 mnem = "lsr";
15018 *amountp = 32;
15019 }
15020
15021 /* Shifts of 0 are no-ops. */
15022 if (*amountp == 0)
15023 return NULL;
15024 }
15025
15026 return mnem;
15027 }
15028
15029 /* Obtain the shift from the POWER of two. */
15030
15031 static HOST_WIDE_INT
15032 int_log2 (HOST_WIDE_INT power)
15033 {
15034 HOST_WIDE_INT shift = 0;
15035
15036 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15037 {
15038 gcc_assert (shift <= 31);
15039 shift++;
15040 }
15041
15042 return shift;
15043 }
15044
15045 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15046 because /bin/as is horribly restrictive. The judgement about
15047 whether or not each character is 'printable' (and can be output as
15048 is) or not (and must be printed with an octal escape) must be made
15049 with reference to the *host* character set -- the situation is
15050 similar to that discussed in the comments above pp_c_char in
15051 c-pretty-print.c. */
15052
15053 #define MAX_ASCII_LEN 51
15054
15055 void
15056 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15057 {
15058 int i;
15059 int len_so_far = 0;
15060
15061 fputs ("\t.ascii\t\"", stream);
15062
15063 for (i = 0; i < len; i++)
15064 {
15065 int c = p[i];
15066
15067 if (len_so_far >= MAX_ASCII_LEN)
15068 {
15069 fputs ("\"\n\t.ascii\t\"", stream);
15070 len_so_far = 0;
15071 }
15072
15073 if (ISPRINT (c))
15074 {
15075 if (c == '\\' || c == '\"')
15076 {
15077 putc ('\\', stream);
15078 len_so_far++;
15079 }
15080 putc (c, stream);
15081 len_so_far++;
15082 }
15083 else
15084 {
15085 fprintf (stream, "\\%03o", c);
15086 len_so_far += 4;
15087 }
15088 }
15089
15090 fputs ("\"\n", stream);
15091 }
15092 \f
15093 /* Compute the register save mask for registers 0 through 12
15094 inclusive. This code is used by arm_compute_save_reg_mask. */
15095
15096 static unsigned long
15097 arm_compute_save_reg0_reg12_mask (void)
15098 {
15099 unsigned long func_type = arm_current_func_type ();
15100 unsigned long save_reg_mask = 0;
15101 unsigned int reg;
15102
15103 if (IS_INTERRUPT (func_type))
15104 {
15105 unsigned int max_reg;
15106 /* Interrupt functions must not corrupt any registers,
15107 even call clobbered ones. If this is a leaf function
15108 we can just examine the registers used by the RTL, but
15109 otherwise we have to assume that whatever function is
15110 called might clobber anything, and so we have to save
15111 all the call-clobbered registers as well. */
15112 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15113 /* FIQ handlers have registers r8 - r12 banked, so
15114 we only need to check r0 - r7, Normal ISRs only
15115 bank r14 and r15, so we must check up to r12.
15116 r13 is the stack pointer which is always preserved,
15117 so we do not need to consider it here. */
15118 max_reg = 7;
15119 else
15120 max_reg = 12;
15121
15122 for (reg = 0; reg <= max_reg; reg++)
15123 if (df_regs_ever_live_p (reg)
15124 || (! crtl->is_leaf && call_used_regs[reg]))
15125 save_reg_mask |= (1 << reg);
15126
15127 /* Also save the pic base register if necessary. */
15128 if (flag_pic
15129 && !TARGET_SINGLE_PIC_BASE
15130 && arm_pic_register != INVALID_REGNUM
15131 && crtl->uses_pic_offset_table)
15132 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15133 }
15134 else if (IS_VOLATILE(func_type))
15135 {
15136 /* For noreturn functions we historically omitted register saves
15137 altogether. However this really messes up debugging. As a
15138 compromise save just the frame pointers. Combined with the link
15139 register saved elsewhere this should be sufficient to get
15140 a backtrace. */
15141 if (frame_pointer_needed)
15142 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15143 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15144 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15145 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15146 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15147 }
15148 else
15149 {
15150 /* In the normal case we only need to save those registers
15151 which are call saved and which are used by this function. */
15152 for (reg = 0; reg <= 11; reg++)
15153 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15154 save_reg_mask |= (1 << reg);
15155
15156 /* Handle the frame pointer as a special case. */
15157 if (frame_pointer_needed)
15158 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15159
15160 /* If we aren't loading the PIC register,
15161 don't stack it even though it may be live. */
15162 if (flag_pic
15163 && !TARGET_SINGLE_PIC_BASE
15164 && arm_pic_register != INVALID_REGNUM
15165 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15166 || crtl->uses_pic_offset_table))
15167 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15168
15169 /* The prologue will copy SP into R0, so save it. */
15170 if (IS_STACKALIGN (func_type))
15171 save_reg_mask |= 1;
15172 }
15173
15174 /* Save registers so the exception handler can modify them. */
15175 if (crtl->calls_eh_return)
15176 {
15177 unsigned int i;
15178
15179 for (i = 0; ; i++)
15180 {
15181 reg = EH_RETURN_DATA_REGNO (i);
15182 if (reg == INVALID_REGNUM)
15183 break;
15184 save_reg_mask |= 1 << reg;
15185 }
15186 }
15187
15188 return save_reg_mask;
15189 }
15190
15191
15192 /* Compute the number of bytes used to store the static chain register on the
15193 stack, above the stack frame. We need to know this accurately to get the
15194 alignment of the rest of the stack frame correct. */
15195
15196 static int arm_compute_static_chain_stack_bytes (void)
15197 {
15198 unsigned long func_type = arm_current_func_type ();
15199 int static_chain_stack_bytes = 0;
15200
15201 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15202 IS_NESTED (func_type) &&
15203 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15204 static_chain_stack_bytes = 4;
15205
15206 return static_chain_stack_bytes;
15207 }
15208
15209
15210 /* Compute a bit mask of which registers need to be
15211 saved on the stack for the current function.
15212 This is used by arm_get_frame_offsets, which may add extra registers. */
15213
15214 static unsigned long
15215 arm_compute_save_reg_mask (void)
15216 {
15217 unsigned int save_reg_mask = 0;
15218 unsigned long func_type = arm_current_func_type ();
15219 unsigned int reg;
15220
15221 if (IS_NAKED (func_type))
15222 /* This should never really happen. */
15223 return 0;
15224
15225 /* If we are creating a stack frame, then we must save the frame pointer,
15226 IP (which will hold the old stack pointer), LR and the PC. */
15227 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15228 save_reg_mask |=
15229 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15230 | (1 << IP_REGNUM)
15231 | (1 << LR_REGNUM)
15232 | (1 << PC_REGNUM);
15233
15234 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15235
15236 /* Decide if we need to save the link register.
15237 Interrupt routines have their own banked link register,
15238 so they never need to save it.
15239 Otherwise if we do not use the link register we do not need to save
15240 it. If we are pushing other registers onto the stack however, we
15241 can save an instruction in the epilogue by pushing the link register
15242 now and then popping it back into the PC. This incurs extra memory
15243 accesses though, so we only do it when optimizing for size, and only
15244 if we know that we will not need a fancy return sequence. */
15245 if (df_regs_ever_live_p (LR_REGNUM)
15246 || (save_reg_mask
15247 && optimize_size
15248 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15249 && !crtl->calls_eh_return))
15250 save_reg_mask |= 1 << LR_REGNUM;
15251
15252 if (cfun->machine->lr_save_eliminated)
15253 save_reg_mask &= ~ (1 << LR_REGNUM);
15254
15255 if (TARGET_REALLY_IWMMXT
15256 && ((bit_count (save_reg_mask)
15257 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15258 arm_compute_static_chain_stack_bytes())
15259 ) % 2) != 0)
15260 {
15261 /* The total number of registers that are going to be pushed
15262 onto the stack is odd. We need to ensure that the stack
15263 is 64-bit aligned before we start to save iWMMXt registers,
15264 and also before we start to create locals. (A local variable
15265 might be a double or long long which we will load/store using
15266 an iWMMXt instruction). Therefore we need to push another
15267 ARM register, so that the stack will be 64-bit aligned. We
15268 try to avoid using the arg registers (r0 -r3) as they might be
15269 used to pass values in a tail call. */
15270 for (reg = 4; reg <= 12; reg++)
15271 if ((save_reg_mask & (1 << reg)) == 0)
15272 break;
15273
15274 if (reg <= 12)
15275 save_reg_mask |= (1 << reg);
15276 else
15277 {
15278 cfun->machine->sibcall_blocked = 1;
15279 save_reg_mask |= (1 << 3);
15280 }
15281 }
15282
15283 /* We may need to push an additional register for use initializing the
15284 PIC base register. */
15285 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15286 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15287 {
15288 reg = thumb_find_work_register (1 << 4);
15289 if (!call_used_regs[reg])
15290 save_reg_mask |= (1 << reg);
15291 }
15292
15293 return save_reg_mask;
15294 }
15295
15296
15297 /* Compute a bit mask of which registers need to be
15298 saved on the stack for the current function. */
15299 static unsigned long
15300 thumb1_compute_save_reg_mask (void)
15301 {
15302 unsigned long mask;
15303 unsigned reg;
15304
15305 mask = 0;
15306 for (reg = 0; reg < 12; reg ++)
15307 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15308 mask |= 1 << reg;
15309
15310 if (flag_pic
15311 && !TARGET_SINGLE_PIC_BASE
15312 && arm_pic_register != INVALID_REGNUM
15313 && crtl->uses_pic_offset_table)
15314 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15315
15316 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15317 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15318 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15319
15320 /* LR will also be pushed if any lo regs are pushed. */
15321 if (mask & 0xff || thumb_force_lr_save ())
15322 mask |= (1 << LR_REGNUM);
15323
15324 /* Make sure we have a low work register if we need one.
15325 We will need one if we are going to push a high register,
15326 but we are not currently intending to push a low register. */
15327 if ((mask & 0xff) == 0
15328 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15329 {
15330 /* Use thumb_find_work_register to choose which register
15331 we will use. If the register is live then we will
15332 have to push it. Use LAST_LO_REGNUM as our fallback
15333 choice for the register to select. */
15334 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15335 /* Make sure the register returned by thumb_find_work_register is
15336 not part of the return value. */
15337 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15338 reg = LAST_LO_REGNUM;
15339
15340 if (! call_used_regs[reg])
15341 mask |= 1 << reg;
15342 }
15343
15344 /* The 504 below is 8 bytes less than 512 because there are two possible
15345 alignment words. We can't tell here if they will be present or not so we
15346 have to play it safe and assume that they are. */
15347 if ((CALLER_INTERWORKING_SLOT_SIZE +
15348 ROUND_UP_WORD (get_frame_size ()) +
15349 crtl->outgoing_args_size) >= 504)
15350 {
15351 /* This is the same as the code in thumb1_expand_prologue() which
15352 determines which register to use for stack decrement. */
15353 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15354 if (mask & (1 << reg))
15355 break;
15356
15357 if (reg > LAST_LO_REGNUM)
15358 {
15359 /* Make sure we have a register available for stack decrement. */
15360 mask |= 1 << LAST_LO_REGNUM;
15361 }
15362 }
15363
15364 return mask;
15365 }
15366
15367
15368 /* Return the number of bytes required to save VFP registers. */
15369 static int
15370 arm_get_vfp_saved_size (void)
15371 {
15372 unsigned int regno;
15373 int count;
15374 int saved;
15375
15376 saved = 0;
15377 /* Space for saved VFP registers. */
15378 if (TARGET_HARD_FLOAT && TARGET_VFP)
15379 {
15380 count = 0;
15381 for (regno = FIRST_VFP_REGNUM;
15382 regno < LAST_VFP_REGNUM;
15383 regno += 2)
15384 {
15385 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15386 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15387 {
15388 if (count > 0)
15389 {
15390 /* Workaround ARM10 VFPr1 bug. */
15391 if (count == 2 && !arm_arch6)
15392 count++;
15393 saved += count * 8;
15394 }
15395 count = 0;
15396 }
15397 else
15398 count++;
15399 }
15400 if (count > 0)
15401 {
15402 if (count == 2 && !arm_arch6)
15403 count++;
15404 saved += count * 8;
15405 }
15406 }
15407 return saved;
15408 }
15409
15410
15411 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15412 everything bar the final return instruction. If simple_return is true,
15413 then do not output epilogue, because it has already been emitted in RTL. */
15414 const char *
15415 output_return_instruction (rtx operand, bool really_return, bool reverse,
15416 bool simple_return)
15417 {
15418 char conditional[10];
15419 char instr[100];
15420 unsigned reg;
15421 unsigned long live_regs_mask;
15422 unsigned long func_type;
15423 arm_stack_offsets *offsets;
15424
15425 func_type = arm_current_func_type ();
15426
15427 if (IS_NAKED (func_type))
15428 return "";
15429
15430 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15431 {
15432 /* If this function was declared non-returning, and we have
15433 found a tail call, then we have to trust that the called
15434 function won't return. */
15435 if (really_return)
15436 {
15437 rtx ops[2];
15438
15439 /* Otherwise, trap an attempted return by aborting. */
15440 ops[0] = operand;
15441 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15442 : "abort");
15443 assemble_external_libcall (ops[1]);
15444 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15445 }
15446
15447 return "";
15448 }
15449
15450 gcc_assert (!cfun->calls_alloca || really_return);
15451
15452 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15453
15454 cfun->machine->return_used_this_function = 1;
15455
15456 offsets = arm_get_frame_offsets ();
15457 live_regs_mask = offsets->saved_regs_mask;
15458
15459 if (!simple_return && live_regs_mask)
15460 {
15461 const char * return_reg;
15462
15463 /* If we do not have any special requirements for function exit
15464 (e.g. interworking) then we can load the return address
15465 directly into the PC. Otherwise we must load it into LR. */
15466 if (really_return
15467 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15468 return_reg = reg_names[PC_REGNUM];
15469 else
15470 return_reg = reg_names[LR_REGNUM];
15471
15472 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15473 {
15474 /* There are three possible reasons for the IP register
15475 being saved. 1) a stack frame was created, in which case
15476 IP contains the old stack pointer, or 2) an ISR routine
15477 corrupted it, or 3) it was saved to align the stack on
15478 iWMMXt. In case 1, restore IP into SP, otherwise just
15479 restore IP. */
15480 if (frame_pointer_needed)
15481 {
15482 live_regs_mask &= ~ (1 << IP_REGNUM);
15483 live_regs_mask |= (1 << SP_REGNUM);
15484 }
15485 else
15486 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15487 }
15488
15489 /* On some ARM architectures it is faster to use LDR rather than
15490 LDM to load a single register. On other architectures, the
15491 cost is the same. In 26 bit mode, or for exception handlers,
15492 we have to use LDM to load the PC so that the CPSR is also
15493 restored. */
15494 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15495 if (live_regs_mask == (1U << reg))
15496 break;
15497
15498 if (reg <= LAST_ARM_REGNUM
15499 && (reg != LR_REGNUM
15500 || ! really_return
15501 || ! IS_INTERRUPT (func_type)))
15502 {
15503 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15504 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15505 }
15506 else
15507 {
15508 char *p;
15509 int first = 1;
15510
15511 /* Generate the load multiple instruction to restore the
15512 registers. Note we can get here, even if
15513 frame_pointer_needed is true, but only if sp already
15514 points to the base of the saved core registers. */
15515 if (live_regs_mask & (1 << SP_REGNUM))
15516 {
15517 unsigned HOST_WIDE_INT stack_adjust;
15518
15519 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15520 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15521
15522 if (stack_adjust && arm_arch5 && TARGET_ARM)
15523 if (TARGET_UNIFIED_ASM)
15524 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15525 else
15526 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15527 else
15528 {
15529 /* If we can't use ldmib (SA110 bug),
15530 then try to pop r3 instead. */
15531 if (stack_adjust)
15532 live_regs_mask |= 1 << 3;
15533
15534 if (TARGET_UNIFIED_ASM)
15535 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15536 else
15537 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15538 }
15539 }
15540 else
15541 if (TARGET_UNIFIED_ASM)
15542 sprintf (instr, "pop%s\t{", conditional);
15543 else
15544 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15545
15546 p = instr + strlen (instr);
15547
15548 for (reg = 0; reg <= SP_REGNUM; reg++)
15549 if (live_regs_mask & (1 << reg))
15550 {
15551 int l = strlen (reg_names[reg]);
15552
15553 if (first)
15554 first = 0;
15555 else
15556 {
15557 memcpy (p, ", ", 2);
15558 p += 2;
15559 }
15560
15561 memcpy (p, "%|", 2);
15562 memcpy (p + 2, reg_names[reg], l);
15563 p += l + 2;
15564 }
15565
15566 if (live_regs_mask & (1 << LR_REGNUM))
15567 {
15568 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15569 /* If returning from an interrupt, restore the CPSR. */
15570 if (IS_INTERRUPT (func_type))
15571 strcat (p, "^");
15572 }
15573 else
15574 strcpy (p, "}");
15575 }
15576
15577 output_asm_insn (instr, & operand);
15578
15579 /* See if we need to generate an extra instruction to
15580 perform the actual function return. */
15581 if (really_return
15582 && func_type != ARM_FT_INTERWORKED
15583 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15584 {
15585 /* The return has already been handled
15586 by loading the LR into the PC. */
15587 return "";
15588 }
15589 }
15590
15591 if (really_return)
15592 {
15593 switch ((int) ARM_FUNC_TYPE (func_type))
15594 {
15595 case ARM_FT_ISR:
15596 case ARM_FT_FIQ:
15597 /* ??? This is wrong for unified assembly syntax. */
15598 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15599 break;
15600
15601 case ARM_FT_INTERWORKED:
15602 sprintf (instr, "bx%s\t%%|lr", conditional);
15603 break;
15604
15605 case ARM_FT_EXCEPTION:
15606 /* ??? This is wrong for unified assembly syntax. */
15607 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15608 break;
15609
15610 default:
15611 /* Use bx if it's available. */
15612 if (arm_arch5 || arm_arch4t)
15613 sprintf (instr, "bx%s\t%%|lr", conditional);
15614 else
15615 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15616 break;
15617 }
15618
15619 output_asm_insn (instr, & operand);
15620 }
15621
15622 return "";
15623 }
15624
15625 /* Write the function name into the code section, directly preceding
15626 the function prologue.
15627
15628 Code will be output similar to this:
15629 t0
15630 .ascii "arm_poke_function_name", 0
15631 .align
15632 t1
15633 .word 0xff000000 + (t1 - t0)
15634 arm_poke_function_name
15635 mov ip, sp
15636 stmfd sp!, {fp, ip, lr, pc}
15637 sub fp, ip, #4
15638
15639 When performing a stack backtrace, code can inspect the value
15640 of 'pc' stored at 'fp' + 0. If the trace function then looks
15641 at location pc - 12 and the top 8 bits are set, then we know
15642 that there is a function name embedded immediately preceding this
15643 location and has length ((pc[-3]) & 0xff000000).
15644
15645 We assume that pc is declared as a pointer to an unsigned long.
15646
15647 It is of no benefit to output the function name if we are assembling
15648 a leaf function. These function types will not contain a stack
15649 backtrace structure, therefore it is not possible to determine the
15650 function name. */
15651 void
15652 arm_poke_function_name (FILE *stream, const char *name)
15653 {
15654 unsigned long alignlength;
15655 unsigned long length;
15656 rtx x;
15657
15658 length = strlen (name) + 1;
15659 alignlength = ROUND_UP_WORD (length);
15660
15661 ASM_OUTPUT_ASCII (stream, name, length);
15662 ASM_OUTPUT_ALIGN (stream, 2);
15663 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15664 assemble_aligned_integer (UNITS_PER_WORD, x);
15665 }
15666
15667 /* Place some comments into the assembler stream
15668 describing the current function. */
15669 static void
15670 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15671 {
15672 unsigned long func_type;
15673
15674 /* ??? Do we want to print some of the below anyway? */
15675 if (TARGET_THUMB1)
15676 return;
15677
15678 /* Sanity check. */
15679 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15680
15681 func_type = arm_current_func_type ();
15682
15683 switch ((int) ARM_FUNC_TYPE (func_type))
15684 {
15685 default:
15686 case ARM_FT_NORMAL:
15687 break;
15688 case ARM_FT_INTERWORKED:
15689 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15690 break;
15691 case ARM_FT_ISR:
15692 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15693 break;
15694 case ARM_FT_FIQ:
15695 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15696 break;
15697 case ARM_FT_EXCEPTION:
15698 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15699 break;
15700 }
15701
15702 if (IS_NAKED (func_type))
15703 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15704
15705 if (IS_VOLATILE (func_type))
15706 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15707
15708 if (IS_NESTED (func_type))
15709 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15710 if (IS_STACKALIGN (func_type))
15711 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15712
15713 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15714 crtl->args.size,
15715 crtl->args.pretend_args_size, frame_size);
15716
15717 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15718 frame_pointer_needed,
15719 cfun->machine->uses_anonymous_args);
15720
15721 if (cfun->machine->lr_save_eliminated)
15722 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15723
15724 if (crtl->calls_eh_return)
15725 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15726
15727 }
15728
15729 static void
15730 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15731 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15732 {
15733 arm_stack_offsets *offsets;
15734
15735 if (TARGET_THUMB1)
15736 {
15737 int regno;
15738
15739 /* Emit any call-via-reg trampolines that are needed for v4t support
15740 of call_reg and call_value_reg type insns. */
15741 for (regno = 0; regno < LR_REGNUM; regno++)
15742 {
15743 rtx label = cfun->machine->call_via[regno];
15744
15745 if (label != NULL)
15746 {
15747 switch_to_section (function_section (current_function_decl));
15748 targetm.asm_out.internal_label (asm_out_file, "L",
15749 CODE_LABEL_NUMBER (label));
15750 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15751 }
15752 }
15753
15754 /* ??? Probably not safe to set this here, since it assumes that a
15755 function will be emitted as assembly immediately after we generate
15756 RTL for it. This does not happen for inline functions. */
15757 cfun->machine->return_used_this_function = 0;
15758 }
15759 else /* TARGET_32BIT */
15760 {
15761 /* We need to take into account any stack-frame rounding. */
15762 offsets = arm_get_frame_offsets ();
15763
15764 gcc_assert (!use_return_insn (FALSE, NULL)
15765 || (cfun->machine->return_used_this_function != 0)
15766 || offsets->saved_regs == offsets->outgoing_args
15767 || frame_pointer_needed);
15768
15769 /* Reset the ARM-specific per-function variables. */
15770 after_arm_reorg = 0;
15771 }
15772 }
15773
15774 /* Generate and emit an insn that we will recognize as a push_multi.
15775 Unfortunately, since this insn does not reflect very well the actual
15776 semantics of the operation, we need to annotate the insn for the benefit
15777 of DWARF2 frame unwind information. */
15778 static rtx
15779 emit_multi_reg_push (unsigned long mask)
15780 {
15781 int num_regs = 0;
15782 int num_dwarf_regs;
15783 int i, j;
15784 rtx par;
15785 rtx dwarf;
15786 int dwarf_par_index;
15787 rtx tmp, reg;
15788
15789 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15790 if (mask & (1 << i))
15791 num_regs++;
15792
15793 gcc_assert (num_regs && num_regs <= 16);
15794
15795 /* We don't record the PC in the dwarf frame information. */
15796 num_dwarf_regs = num_regs;
15797 if (mask & (1 << PC_REGNUM))
15798 num_dwarf_regs--;
15799
15800 /* For the body of the insn we are going to generate an UNSPEC in
15801 parallel with several USEs. This allows the insn to be recognized
15802 by the push_multi pattern in the arm.md file.
15803
15804 The body of the insn looks something like this:
15805
15806 (parallel [
15807 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15808 (const_int:SI <num>)))
15809 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15810 (use (reg:SI XX))
15811 (use (reg:SI YY))
15812 ...
15813 ])
15814
15815 For the frame note however, we try to be more explicit and actually
15816 show each register being stored into the stack frame, plus a (single)
15817 decrement of the stack pointer. We do it this way in order to be
15818 friendly to the stack unwinding code, which only wants to see a single
15819 stack decrement per instruction. The RTL we generate for the note looks
15820 something like this:
15821
15822 (sequence [
15823 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15824 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15825 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15826 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15827 ...
15828 ])
15829
15830 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15831 instead we'd have a parallel expression detailing all
15832 the stores to the various memory addresses so that debug
15833 information is more up-to-date. Remember however while writing
15834 this to take care of the constraints with the push instruction.
15835
15836 Note also that this has to be taken care of for the VFP registers.
15837
15838 For more see PR43399. */
15839
15840 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15841 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15842 dwarf_par_index = 1;
15843
15844 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15845 {
15846 if (mask & (1 << i))
15847 {
15848 reg = gen_rtx_REG (SImode, i);
15849
15850 XVECEXP (par, 0, 0)
15851 = gen_rtx_SET (VOIDmode,
15852 gen_frame_mem
15853 (BLKmode,
15854 gen_rtx_PRE_MODIFY (Pmode,
15855 stack_pointer_rtx,
15856 plus_constant
15857 (Pmode, stack_pointer_rtx,
15858 -4 * num_regs))
15859 ),
15860 gen_rtx_UNSPEC (BLKmode,
15861 gen_rtvec (1, reg),
15862 UNSPEC_PUSH_MULT));
15863
15864 if (i != PC_REGNUM)
15865 {
15866 tmp = gen_rtx_SET (VOIDmode,
15867 gen_frame_mem (SImode, stack_pointer_rtx),
15868 reg);
15869 RTX_FRAME_RELATED_P (tmp) = 1;
15870 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15871 dwarf_par_index++;
15872 }
15873
15874 break;
15875 }
15876 }
15877
15878 for (j = 1, i++; j < num_regs; i++)
15879 {
15880 if (mask & (1 << i))
15881 {
15882 reg = gen_rtx_REG (SImode, i);
15883
15884 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15885
15886 if (i != PC_REGNUM)
15887 {
15888 tmp
15889 = gen_rtx_SET (VOIDmode,
15890 gen_frame_mem
15891 (SImode,
15892 plus_constant (Pmode, stack_pointer_rtx,
15893 4 * j)),
15894 reg);
15895 RTX_FRAME_RELATED_P (tmp) = 1;
15896 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15897 }
15898
15899 j++;
15900 }
15901 }
15902
15903 par = emit_insn (par);
15904
15905 tmp = gen_rtx_SET (VOIDmode,
15906 stack_pointer_rtx,
15907 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15908 RTX_FRAME_RELATED_P (tmp) = 1;
15909 XVECEXP (dwarf, 0, 0) = tmp;
15910
15911 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15912
15913 return par;
15914 }
15915
15916 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15917 SAVED_REGS_MASK shows which registers need to be restored.
15918
15919 Unfortunately, since this insn does not reflect very well the actual
15920 semantics of the operation, we need to annotate the insn for the benefit
15921 of DWARF2 frame unwind information. */
15922 static void
15923 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
15924 {
15925 int num_regs = 0;
15926 int i, j;
15927 rtx par;
15928 rtx dwarf = NULL_RTX;
15929 rtx tmp, reg;
15930 bool return_in_pc;
15931 int offset_adj;
15932 int emit_update;
15933
15934 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
15935 offset_adj = return_in_pc ? 1 : 0;
15936 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15937 if (saved_regs_mask & (1 << i))
15938 num_regs++;
15939
15940 gcc_assert (num_regs && num_regs <= 16);
15941
15942 /* If SP is in reglist, then we don't emit SP update insn. */
15943 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
15944
15945 /* The parallel needs to hold num_regs SETs
15946 and one SET for the stack update. */
15947 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
15948
15949 if (return_in_pc)
15950 {
15951 tmp = ret_rtx;
15952 XVECEXP (par, 0, 0) = tmp;
15953 }
15954
15955 if (emit_update)
15956 {
15957 /* Increment the stack pointer, based on there being
15958 num_regs 4-byte registers to restore. */
15959 tmp = gen_rtx_SET (VOIDmode,
15960 stack_pointer_rtx,
15961 plus_constant (Pmode,
15962 stack_pointer_rtx,
15963 4 * num_regs));
15964 RTX_FRAME_RELATED_P (tmp) = 1;
15965 XVECEXP (par, 0, offset_adj) = tmp;
15966 }
15967
15968 /* Now restore every reg, which may include PC. */
15969 for (j = 0, i = 0; j < num_regs; i++)
15970 if (saved_regs_mask & (1 << i))
15971 {
15972 reg = gen_rtx_REG (SImode, i);
15973 tmp = gen_rtx_SET (VOIDmode,
15974 reg,
15975 gen_frame_mem
15976 (SImode,
15977 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
15978 RTX_FRAME_RELATED_P (tmp) = 1;
15979 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
15980
15981 /* We need to maintain a sequence for DWARF info too. As dwarf info
15982 should not have PC, skip PC. */
15983 if (i != PC_REGNUM)
15984 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15985
15986 j++;
15987 }
15988
15989 if (return_in_pc)
15990 par = emit_jump_insn (par);
15991 else
15992 par = emit_insn (par);
15993
15994 REG_NOTES (par) = dwarf;
15995 }
15996
15997 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15998 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15999
16000 Unfortunately, since this insn does not reflect very well the actual
16001 semantics of the operation, we need to annotate the insn for the benefit
16002 of DWARF2 frame unwind information. */
16003 static void
16004 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
16005 {
16006 int i, j;
16007 rtx par;
16008 rtx dwarf = NULL_RTX;
16009 rtx tmp, reg;
16010
16011 gcc_assert (num_regs && num_regs <= 32);
16012
16013 /* Workaround ARM10 VFPr1 bug. */
16014 if (num_regs == 2 && !arm_arch6)
16015 {
16016 if (first_reg == 15)
16017 first_reg--;
16018
16019 num_regs++;
16020 }
16021
16022 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16023 there could be up to 32 D-registers to restore.
16024 If there are more than 16 D-registers, make two recursive calls,
16025 each of which emits one pop_multi instruction. */
16026 if (num_regs > 16)
16027 {
16028 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16029 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16030 return;
16031 }
16032
16033 /* The parallel needs to hold num_regs SETs
16034 and one SET for the stack update. */
16035 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16036
16037 /* Increment the stack pointer, based on there being
16038 num_regs 8-byte registers to restore. */
16039 tmp = gen_rtx_SET (VOIDmode,
16040 base_reg,
16041 plus_constant (Pmode, base_reg, 8 * num_regs));
16042 RTX_FRAME_RELATED_P (tmp) = 1;
16043 XVECEXP (par, 0, 0) = tmp;
16044
16045 /* Now show every reg that will be restored, using a SET for each. */
16046 for (j = 0, i=first_reg; j < num_regs; i += 2)
16047 {
16048 reg = gen_rtx_REG (DFmode, i);
16049
16050 tmp = gen_rtx_SET (VOIDmode,
16051 reg,
16052 gen_frame_mem
16053 (DFmode,
16054 plus_constant (Pmode, base_reg, 8 * j)));
16055 RTX_FRAME_RELATED_P (tmp) = 1;
16056 XVECEXP (par, 0, j + 1) = tmp;
16057
16058 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16059
16060 j++;
16061 }
16062
16063 par = emit_insn (par);
16064 REG_NOTES (par) = dwarf;
16065 }
16066
16067 /* Calculate the size of the return value that is passed in registers. */
16068 static unsigned
16069 arm_size_return_regs (void)
16070 {
16071 enum machine_mode mode;
16072
16073 if (crtl->return_rtx != 0)
16074 mode = GET_MODE (crtl->return_rtx);
16075 else
16076 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16077
16078 return GET_MODE_SIZE (mode);
16079 }
16080
16081 /* Return true if the current function needs to save/restore LR. */
16082 static bool
16083 thumb_force_lr_save (void)
16084 {
16085 return !cfun->machine->lr_save_eliminated
16086 && (!leaf_function_p ()
16087 || thumb_far_jump_used_p ()
16088 || df_regs_ever_live_p (LR_REGNUM));
16089 }
16090
16091
16092 /* Return true if r3 is used by any of the tail call insns in the
16093 current function. */
16094 static bool
16095 any_sibcall_uses_r3 (void)
16096 {
16097 edge_iterator ei;
16098 edge e;
16099
16100 if (!crtl->tail_call_emit)
16101 return false;
16102 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16103 if (e->flags & EDGE_SIBCALL)
16104 {
16105 rtx call = BB_END (e->src);
16106 if (!CALL_P (call))
16107 call = prev_nonnote_nondebug_insn (call);
16108 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16109 if (find_regno_fusage (call, USE, 3))
16110 return true;
16111 }
16112 return false;
16113 }
16114
16115
16116 /* Compute the distance from register FROM to register TO.
16117 These can be the arg pointer (26), the soft frame pointer (25),
16118 the stack pointer (13) or the hard frame pointer (11).
16119 In thumb mode r7 is used as the soft frame pointer, if needed.
16120 Typical stack layout looks like this:
16121
16122 old stack pointer -> | |
16123 ----
16124 | | \
16125 | | saved arguments for
16126 | | vararg functions
16127 | | /
16128 --
16129 hard FP & arg pointer -> | | \
16130 | | stack
16131 | | frame
16132 | | /
16133 --
16134 | | \
16135 | | call saved
16136 | | registers
16137 soft frame pointer -> | | /
16138 --
16139 | | \
16140 | | local
16141 | | variables
16142 locals base pointer -> | | /
16143 --
16144 | | \
16145 | | outgoing
16146 | | arguments
16147 current stack pointer -> | | /
16148 --
16149
16150 For a given function some or all of these stack components
16151 may not be needed, giving rise to the possibility of
16152 eliminating some of the registers.
16153
16154 The values returned by this function must reflect the behavior
16155 of arm_expand_prologue() and arm_compute_save_reg_mask().
16156
16157 The sign of the number returned reflects the direction of stack
16158 growth, so the values are positive for all eliminations except
16159 from the soft frame pointer to the hard frame pointer.
16160
16161 SFP may point just inside the local variables block to ensure correct
16162 alignment. */
16163
16164
16165 /* Calculate stack offsets. These are used to calculate register elimination
16166 offsets and in prologue/epilogue code. Also calculates which registers
16167 should be saved. */
16168
16169 static arm_stack_offsets *
16170 arm_get_frame_offsets (void)
16171 {
16172 struct arm_stack_offsets *offsets;
16173 unsigned long func_type;
16174 int leaf;
16175 int saved;
16176 int core_saved;
16177 HOST_WIDE_INT frame_size;
16178 int i;
16179
16180 offsets = &cfun->machine->stack_offsets;
16181
16182 /* We need to know if we are a leaf function. Unfortunately, it
16183 is possible to be called after start_sequence has been called,
16184 which causes get_insns to return the insns for the sequence,
16185 not the function, which will cause leaf_function_p to return
16186 the incorrect result.
16187
16188 to know about leaf functions once reload has completed, and the
16189 frame size cannot be changed after that time, so we can safely
16190 use the cached value. */
16191
16192 if (reload_completed)
16193 return offsets;
16194
16195 /* Initially this is the size of the local variables. It will translated
16196 into an offset once we have determined the size of preceding data. */
16197 frame_size = ROUND_UP_WORD (get_frame_size ());
16198
16199 leaf = leaf_function_p ();
16200
16201 /* Space for variadic functions. */
16202 offsets->saved_args = crtl->args.pretend_args_size;
16203
16204 /* In Thumb mode this is incorrect, but never used. */
16205 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16206 arm_compute_static_chain_stack_bytes();
16207
16208 if (TARGET_32BIT)
16209 {
16210 unsigned int regno;
16211
16212 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16213 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16214 saved = core_saved;
16215
16216 /* We know that SP will be doubleword aligned on entry, and we must
16217 preserve that condition at any subroutine call. We also require the
16218 soft frame pointer to be doubleword aligned. */
16219
16220 if (TARGET_REALLY_IWMMXT)
16221 {
16222 /* Check for the call-saved iWMMXt registers. */
16223 for (regno = FIRST_IWMMXT_REGNUM;
16224 regno <= LAST_IWMMXT_REGNUM;
16225 regno++)
16226 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16227 saved += 8;
16228 }
16229
16230 func_type = arm_current_func_type ();
16231 /* Space for saved VFP registers. */
16232 if (! IS_VOLATILE (func_type)
16233 && TARGET_HARD_FLOAT && TARGET_VFP)
16234 saved += arm_get_vfp_saved_size ();
16235 }
16236 else /* TARGET_THUMB1 */
16237 {
16238 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16239 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16240 saved = core_saved;
16241 if (TARGET_BACKTRACE)
16242 saved += 16;
16243 }
16244
16245 /* Saved registers include the stack frame. */
16246 offsets->saved_regs = offsets->saved_args + saved +
16247 arm_compute_static_chain_stack_bytes();
16248 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16249 /* A leaf function does not need any stack alignment if it has nothing
16250 on the stack. */
16251 if (leaf && frame_size == 0
16252 /* However if it calls alloca(), we have a dynamically allocated
16253 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16254 && ! cfun->calls_alloca)
16255 {
16256 offsets->outgoing_args = offsets->soft_frame;
16257 offsets->locals_base = offsets->soft_frame;
16258 return offsets;
16259 }
16260
16261 /* Ensure SFP has the correct alignment. */
16262 if (ARM_DOUBLEWORD_ALIGN
16263 && (offsets->soft_frame & 7))
16264 {
16265 offsets->soft_frame += 4;
16266 /* Try to align stack by pushing an extra reg. Don't bother doing this
16267 when there is a stack frame as the alignment will be rolled into
16268 the normal stack adjustment. */
16269 if (frame_size + crtl->outgoing_args_size == 0)
16270 {
16271 int reg = -1;
16272
16273 /* If it is safe to use r3, then do so. This sometimes
16274 generates better code on Thumb-2 by avoiding the need to
16275 use 32-bit push/pop instructions. */
16276 if (! any_sibcall_uses_r3 ()
16277 && arm_size_return_regs () <= 12
16278 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16279 {
16280 reg = 3;
16281 }
16282 else
16283 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16284 {
16285 /* Avoid fixed registers; they may be changed at
16286 arbitrary times so it's unsafe to restore them
16287 during the epilogue. */
16288 if (!fixed_regs[i]
16289 && (offsets->saved_regs_mask & (1 << i)) == 0)
16290 {
16291 reg = i;
16292 break;
16293 }
16294 }
16295
16296 if (reg != -1)
16297 {
16298 offsets->saved_regs += 4;
16299 offsets->saved_regs_mask |= (1 << reg);
16300 }
16301 }
16302 }
16303
16304 offsets->locals_base = offsets->soft_frame + frame_size;
16305 offsets->outgoing_args = (offsets->locals_base
16306 + crtl->outgoing_args_size);
16307
16308 if (ARM_DOUBLEWORD_ALIGN)
16309 {
16310 /* Ensure SP remains doubleword aligned. */
16311 if (offsets->outgoing_args & 7)
16312 offsets->outgoing_args += 4;
16313 gcc_assert (!(offsets->outgoing_args & 7));
16314 }
16315
16316 return offsets;
16317 }
16318
16319
16320 /* Calculate the relative offsets for the different stack pointers. Positive
16321 offsets are in the direction of stack growth. */
16322
16323 HOST_WIDE_INT
16324 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16325 {
16326 arm_stack_offsets *offsets;
16327
16328 offsets = arm_get_frame_offsets ();
16329
16330 /* OK, now we have enough information to compute the distances.
16331 There must be an entry in these switch tables for each pair
16332 of registers in ELIMINABLE_REGS, even if some of the entries
16333 seem to be redundant or useless. */
16334 switch (from)
16335 {
16336 case ARG_POINTER_REGNUM:
16337 switch (to)
16338 {
16339 case THUMB_HARD_FRAME_POINTER_REGNUM:
16340 return 0;
16341
16342 case FRAME_POINTER_REGNUM:
16343 /* This is the reverse of the soft frame pointer
16344 to hard frame pointer elimination below. */
16345 return offsets->soft_frame - offsets->saved_args;
16346
16347 case ARM_HARD_FRAME_POINTER_REGNUM:
16348 /* This is only non-zero in the case where the static chain register
16349 is stored above the frame. */
16350 return offsets->frame - offsets->saved_args - 4;
16351
16352 case STACK_POINTER_REGNUM:
16353 /* If nothing has been pushed on the stack at all
16354 then this will return -4. This *is* correct! */
16355 return offsets->outgoing_args - (offsets->saved_args + 4);
16356
16357 default:
16358 gcc_unreachable ();
16359 }
16360 gcc_unreachable ();
16361
16362 case FRAME_POINTER_REGNUM:
16363 switch (to)
16364 {
16365 case THUMB_HARD_FRAME_POINTER_REGNUM:
16366 return 0;
16367
16368 case ARM_HARD_FRAME_POINTER_REGNUM:
16369 /* The hard frame pointer points to the top entry in the
16370 stack frame. The soft frame pointer to the bottom entry
16371 in the stack frame. If there is no stack frame at all,
16372 then they are identical. */
16373
16374 return offsets->frame - offsets->soft_frame;
16375
16376 case STACK_POINTER_REGNUM:
16377 return offsets->outgoing_args - offsets->soft_frame;
16378
16379 default:
16380 gcc_unreachable ();
16381 }
16382 gcc_unreachable ();
16383
16384 default:
16385 /* You cannot eliminate from the stack pointer.
16386 In theory you could eliminate from the hard frame
16387 pointer to the stack pointer, but this will never
16388 happen, since if a stack frame is not needed the
16389 hard frame pointer will never be used. */
16390 gcc_unreachable ();
16391 }
16392 }
16393
16394 /* Given FROM and TO register numbers, say whether this elimination is
16395 allowed. Frame pointer elimination is automatically handled.
16396
16397 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16398 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16399 pointer, we must eliminate FRAME_POINTER_REGNUM into
16400 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16401 ARG_POINTER_REGNUM. */
16402
16403 bool
16404 arm_can_eliminate (const int from, const int to)
16405 {
16406 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16407 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16408 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16409 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16410 true);
16411 }
16412
16413 /* Emit RTL to save coprocessor registers on function entry. Returns the
16414 number of bytes pushed. */
16415
16416 static int
16417 arm_save_coproc_regs(void)
16418 {
16419 int saved_size = 0;
16420 unsigned reg;
16421 unsigned start_reg;
16422 rtx insn;
16423
16424 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16425 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16426 {
16427 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16428 insn = gen_rtx_MEM (V2SImode, insn);
16429 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16430 RTX_FRAME_RELATED_P (insn) = 1;
16431 saved_size += 8;
16432 }
16433
16434 if (TARGET_HARD_FLOAT && TARGET_VFP)
16435 {
16436 start_reg = FIRST_VFP_REGNUM;
16437
16438 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16439 {
16440 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16441 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16442 {
16443 if (start_reg != reg)
16444 saved_size += vfp_emit_fstmd (start_reg,
16445 (reg - start_reg) / 2);
16446 start_reg = reg + 2;
16447 }
16448 }
16449 if (start_reg != reg)
16450 saved_size += vfp_emit_fstmd (start_reg,
16451 (reg - start_reg) / 2);
16452 }
16453 return saved_size;
16454 }
16455
16456
16457 /* Set the Thumb frame pointer from the stack pointer. */
16458
16459 static void
16460 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16461 {
16462 HOST_WIDE_INT amount;
16463 rtx insn, dwarf;
16464
16465 amount = offsets->outgoing_args - offsets->locals_base;
16466 if (amount < 1024)
16467 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16468 stack_pointer_rtx, GEN_INT (amount)));
16469 else
16470 {
16471 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16472 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16473 expects the first two operands to be the same. */
16474 if (TARGET_THUMB2)
16475 {
16476 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16477 stack_pointer_rtx,
16478 hard_frame_pointer_rtx));
16479 }
16480 else
16481 {
16482 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16483 hard_frame_pointer_rtx,
16484 stack_pointer_rtx));
16485 }
16486 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16487 plus_constant (Pmode, stack_pointer_rtx, amount));
16488 RTX_FRAME_RELATED_P (dwarf) = 1;
16489 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16490 }
16491
16492 RTX_FRAME_RELATED_P (insn) = 1;
16493 }
16494
16495 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16496 function. */
16497 void
16498 arm_expand_prologue (void)
16499 {
16500 rtx amount;
16501 rtx insn;
16502 rtx ip_rtx;
16503 unsigned long live_regs_mask;
16504 unsigned long func_type;
16505 int fp_offset = 0;
16506 int saved_pretend_args = 0;
16507 int saved_regs = 0;
16508 unsigned HOST_WIDE_INT args_to_push;
16509 arm_stack_offsets *offsets;
16510
16511 func_type = arm_current_func_type ();
16512
16513 /* Naked functions don't have prologues. */
16514 if (IS_NAKED (func_type))
16515 return;
16516
16517 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16518 args_to_push = crtl->args.pretend_args_size;
16519
16520 /* Compute which register we will have to save onto the stack. */
16521 offsets = arm_get_frame_offsets ();
16522 live_regs_mask = offsets->saved_regs_mask;
16523
16524 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16525
16526 if (IS_STACKALIGN (func_type))
16527 {
16528 rtx r0, r1;
16529
16530 /* Handle a word-aligned stack pointer. We generate the following:
16531
16532 mov r0, sp
16533 bic r1, r0, #7
16534 mov sp, r1
16535 <save and restore r0 in normal prologue/epilogue>
16536 mov sp, r0
16537 bx lr
16538
16539 The unwinder doesn't need to know about the stack realignment.
16540 Just tell it we saved SP in r0. */
16541 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16542
16543 r0 = gen_rtx_REG (SImode, 0);
16544 r1 = gen_rtx_REG (SImode, 1);
16545
16546 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16547 RTX_FRAME_RELATED_P (insn) = 1;
16548 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16549
16550 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16551
16552 /* ??? The CFA changes here, which may cause GDB to conclude that it
16553 has entered a different function. That said, the unwind info is
16554 correct, individually, before and after this instruction because
16555 we've described the save of SP, which will override the default
16556 handling of SP as restoring from the CFA. */
16557 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16558 }
16559
16560 /* For APCS frames, if IP register is clobbered
16561 when creating frame, save that register in a special
16562 way. */
16563 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16564 {
16565 if (IS_INTERRUPT (func_type))
16566 {
16567 /* Interrupt functions must not corrupt any registers.
16568 Creating a frame pointer however, corrupts the IP
16569 register, so we must push it first. */
16570 emit_multi_reg_push (1 << IP_REGNUM);
16571
16572 /* Do not set RTX_FRAME_RELATED_P on this insn.
16573 The dwarf stack unwinding code only wants to see one
16574 stack decrement per function, and this is not it. If
16575 this instruction is labeled as being part of the frame
16576 creation sequence then dwarf2out_frame_debug_expr will
16577 die when it encounters the assignment of IP to FP
16578 later on, since the use of SP here establishes SP as
16579 the CFA register and not IP.
16580
16581 Anyway this instruction is not really part of the stack
16582 frame creation although it is part of the prologue. */
16583 }
16584 else if (IS_NESTED (func_type))
16585 {
16586 /* The Static chain register is the same as the IP register
16587 used as a scratch register during stack frame creation.
16588 To get around this need to find somewhere to store IP
16589 whilst the frame is being created. We try the following
16590 places in order:
16591
16592 1. The last argument register.
16593 2. A slot on the stack above the frame. (This only
16594 works if the function is not a varargs function).
16595 3. Register r3, after pushing the argument registers
16596 onto the stack.
16597
16598 Note - we only need to tell the dwarf2 backend about the SP
16599 adjustment in the second variant; the static chain register
16600 doesn't need to be unwound, as it doesn't contain a value
16601 inherited from the caller. */
16602
16603 if (df_regs_ever_live_p (3) == false)
16604 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16605 else if (args_to_push == 0)
16606 {
16607 rtx dwarf;
16608
16609 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16610 saved_regs += 4;
16611
16612 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16613 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16614 fp_offset = 4;
16615
16616 /* Just tell the dwarf backend that we adjusted SP. */
16617 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16618 plus_constant (Pmode, stack_pointer_rtx,
16619 -fp_offset));
16620 RTX_FRAME_RELATED_P (insn) = 1;
16621 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16622 }
16623 else
16624 {
16625 /* Store the args on the stack. */
16626 if (cfun->machine->uses_anonymous_args)
16627 insn = emit_multi_reg_push
16628 ((0xf0 >> (args_to_push / 4)) & 0xf);
16629 else
16630 insn = emit_insn
16631 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16632 GEN_INT (- args_to_push)));
16633
16634 RTX_FRAME_RELATED_P (insn) = 1;
16635
16636 saved_pretend_args = 1;
16637 fp_offset = args_to_push;
16638 args_to_push = 0;
16639
16640 /* Now reuse r3 to preserve IP. */
16641 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16642 }
16643 }
16644
16645 insn = emit_set_insn (ip_rtx,
16646 plus_constant (Pmode, stack_pointer_rtx,
16647 fp_offset));
16648 RTX_FRAME_RELATED_P (insn) = 1;
16649 }
16650
16651 if (args_to_push)
16652 {
16653 /* Push the argument registers, or reserve space for them. */
16654 if (cfun->machine->uses_anonymous_args)
16655 insn = emit_multi_reg_push
16656 ((0xf0 >> (args_to_push / 4)) & 0xf);
16657 else
16658 insn = emit_insn
16659 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16660 GEN_INT (- args_to_push)));
16661 RTX_FRAME_RELATED_P (insn) = 1;
16662 }
16663
16664 /* If this is an interrupt service routine, and the link register
16665 is going to be pushed, and we're not generating extra
16666 push of IP (needed when frame is needed and frame layout if apcs),
16667 subtracting four from LR now will mean that the function return
16668 can be done with a single instruction. */
16669 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16670 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16671 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16672 && TARGET_ARM)
16673 {
16674 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16675
16676 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16677 }
16678
16679 if (live_regs_mask)
16680 {
16681 saved_regs += bit_count (live_regs_mask) * 4;
16682 if (optimize_size && !frame_pointer_needed
16683 && saved_regs == offsets->saved_regs - offsets->saved_args)
16684 {
16685 /* If no coprocessor registers are being pushed and we don't have
16686 to worry about a frame pointer then push extra registers to
16687 create the stack frame. This is done is a way that does not
16688 alter the frame layout, so is independent of the epilogue. */
16689 int n;
16690 int frame;
16691 n = 0;
16692 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16693 n++;
16694 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16695 if (frame && n * 4 >= frame)
16696 {
16697 n = frame / 4;
16698 live_regs_mask |= (1 << n) - 1;
16699 saved_regs += frame;
16700 }
16701 }
16702 insn = emit_multi_reg_push (live_regs_mask);
16703 RTX_FRAME_RELATED_P (insn) = 1;
16704 }
16705
16706 if (! IS_VOLATILE (func_type))
16707 saved_regs += arm_save_coproc_regs ();
16708
16709 if (frame_pointer_needed && TARGET_ARM)
16710 {
16711 /* Create the new frame pointer. */
16712 if (TARGET_APCS_FRAME)
16713 {
16714 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16715 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16716 RTX_FRAME_RELATED_P (insn) = 1;
16717
16718 if (IS_NESTED (func_type))
16719 {
16720 /* Recover the static chain register. */
16721 if (!df_regs_ever_live_p (3)
16722 || saved_pretend_args)
16723 insn = gen_rtx_REG (SImode, 3);
16724 else /* if (crtl->args.pretend_args_size == 0) */
16725 {
16726 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16727 insn = gen_frame_mem (SImode, insn);
16728 }
16729 emit_set_insn (ip_rtx, insn);
16730 /* Add a USE to stop propagate_one_insn() from barfing. */
16731 emit_insn (gen_prologue_use (ip_rtx));
16732 }
16733 }
16734 else
16735 {
16736 insn = GEN_INT (saved_regs - 4);
16737 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16738 stack_pointer_rtx, insn));
16739 RTX_FRAME_RELATED_P (insn) = 1;
16740 }
16741 }
16742
16743 if (flag_stack_usage_info)
16744 current_function_static_stack_size
16745 = offsets->outgoing_args - offsets->saved_args;
16746
16747 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16748 {
16749 /* This add can produce multiple insns for a large constant, so we
16750 need to get tricky. */
16751 rtx last = get_last_insn ();
16752
16753 amount = GEN_INT (offsets->saved_args + saved_regs
16754 - offsets->outgoing_args);
16755
16756 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16757 amount));
16758 do
16759 {
16760 last = last ? NEXT_INSN (last) : get_insns ();
16761 RTX_FRAME_RELATED_P (last) = 1;
16762 }
16763 while (last != insn);
16764
16765 /* If the frame pointer is needed, emit a special barrier that
16766 will prevent the scheduler from moving stores to the frame
16767 before the stack adjustment. */
16768 if (frame_pointer_needed)
16769 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16770 hard_frame_pointer_rtx));
16771 }
16772
16773
16774 if (frame_pointer_needed && TARGET_THUMB2)
16775 thumb_set_frame_pointer (offsets);
16776
16777 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16778 {
16779 unsigned long mask;
16780
16781 mask = live_regs_mask;
16782 mask &= THUMB2_WORK_REGS;
16783 if (!IS_NESTED (func_type))
16784 mask |= (1 << IP_REGNUM);
16785 arm_load_pic_register (mask);
16786 }
16787
16788 /* If we are profiling, make sure no instructions are scheduled before
16789 the call to mcount. Similarly if the user has requested no
16790 scheduling in the prolog. Similarly if we want non-call exceptions
16791 using the EABI unwinder, to prevent faulting instructions from being
16792 swapped with a stack adjustment. */
16793 if (crtl->profile || !TARGET_SCHED_PROLOG
16794 || (arm_except_unwind_info (&global_options) == UI_TARGET
16795 && cfun->can_throw_non_call_exceptions))
16796 emit_insn (gen_blockage ());
16797
16798 /* If the link register is being kept alive, with the return address in it,
16799 then make sure that it does not get reused by the ce2 pass. */
16800 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16801 cfun->machine->lr_save_eliminated = 1;
16802 }
16803 \f
16804 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16805 static void
16806 arm_print_condition (FILE *stream)
16807 {
16808 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16809 {
16810 /* Branch conversion is not implemented for Thumb-2. */
16811 if (TARGET_THUMB)
16812 {
16813 output_operand_lossage ("predicated Thumb instruction");
16814 return;
16815 }
16816 if (current_insn_predicate != NULL)
16817 {
16818 output_operand_lossage
16819 ("predicated instruction in conditional sequence");
16820 return;
16821 }
16822
16823 fputs (arm_condition_codes[arm_current_cc], stream);
16824 }
16825 else if (current_insn_predicate)
16826 {
16827 enum arm_cond_code code;
16828
16829 if (TARGET_THUMB1)
16830 {
16831 output_operand_lossage ("predicated Thumb instruction");
16832 return;
16833 }
16834
16835 code = get_arm_condition_code (current_insn_predicate);
16836 fputs (arm_condition_codes[code], stream);
16837 }
16838 }
16839
16840
16841 /* If CODE is 'd', then the X is a condition operand and the instruction
16842 should only be executed if the condition is true.
16843 if CODE is 'D', then the X is a condition operand and the instruction
16844 should only be executed if the condition is false: however, if the mode
16845 of the comparison is CCFPEmode, then always execute the instruction -- we
16846 do this because in these circumstances !GE does not necessarily imply LT;
16847 in these cases the instruction pattern will take care to make sure that
16848 an instruction containing %d will follow, thereby undoing the effects of
16849 doing this instruction unconditionally.
16850 If CODE is 'N' then X is a floating point operand that must be negated
16851 before output.
16852 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16853 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16854 static void
16855 arm_print_operand (FILE *stream, rtx x, int code)
16856 {
16857 switch (code)
16858 {
16859 case '@':
16860 fputs (ASM_COMMENT_START, stream);
16861 return;
16862
16863 case '_':
16864 fputs (user_label_prefix, stream);
16865 return;
16866
16867 case '|':
16868 fputs (REGISTER_PREFIX, stream);
16869 return;
16870
16871 case '?':
16872 arm_print_condition (stream);
16873 return;
16874
16875 case '(':
16876 /* Nothing in unified syntax, otherwise the current condition code. */
16877 if (!TARGET_UNIFIED_ASM)
16878 arm_print_condition (stream);
16879 break;
16880
16881 case ')':
16882 /* The current condition code in unified syntax, otherwise nothing. */
16883 if (TARGET_UNIFIED_ASM)
16884 arm_print_condition (stream);
16885 break;
16886
16887 case '.':
16888 /* The current condition code for a condition code setting instruction.
16889 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16890 if (TARGET_UNIFIED_ASM)
16891 {
16892 fputc('s', stream);
16893 arm_print_condition (stream);
16894 }
16895 else
16896 {
16897 arm_print_condition (stream);
16898 fputc('s', stream);
16899 }
16900 return;
16901
16902 case '!':
16903 /* If the instruction is conditionally executed then print
16904 the current condition code, otherwise print 's'. */
16905 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16906 if (current_insn_predicate)
16907 arm_print_condition (stream);
16908 else
16909 fputc('s', stream);
16910 break;
16911
16912 /* %# is a "break" sequence. It doesn't output anything, but is used to
16913 separate e.g. operand numbers from following text, if that text consists
16914 of further digits which we don't want to be part of the operand
16915 number. */
16916 case '#':
16917 return;
16918
16919 case 'N':
16920 {
16921 REAL_VALUE_TYPE r;
16922 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16923 r = real_value_negate (&r);
16924 fprintf (stream, "%s", fp_const_from_val (&r));
16925 }
16926 return;
16927
16928 /* An integer or symbol address without a preceding # sign. */
16929 case 'c':
16930 switch (GET_CODE (x))
16931 {
16932 case CONST_INT:
16933 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16934 break;
16935
16936 case SYMBOL_REF:
16937 output_addr_const (stream, x);
16938 break;
16939
16940 case CONST:
16941 if (GET_CODE (XEXP (x, 0)) == PLUS
16942 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16943 {
16944 output_addr_const (stream, x);
16945 break;
16946 }
16947 /* Fall through. */
16948
16949 default:
16950 output_operand_lossage ("Unsupported operand for code '%c'", code);
16951 }
16952 return;
16953
16954 /* An integer that we want to print in HEX. */
16955 case 'x':
16956 switch (GET_CODE (x))
16957 {
16958 case CONST_INT:
16959 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16960 break;
16961
16962 default:
16963 output_operand_lossage ("Unsupported operand for code '%c'", code);
16964 }
16965 return;
16966
16967 case 'B':
16968 if (CONST_INT_P (x))
16969 {
16970 HOST_WIDE_INT val;
16971 val = ARM_SIGN_EXTEND (~INTVAL (x));
16972 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16973 }
16974 else
16975 {
16976 putc ('~', stream);
16977 output_addr_const (stream, x);
16978 }
16979 return;
16980
16981 case 'L':
16982 /* The low 16 bits of an immediate constant. */
16983 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16984 return;
16985
16986 case 'i':
16987 fprintf (stream, "%s", arithmetic_instr (x, 1));
16988 return;
16989
16990 case 'I':
16991 fprintf (stream, "%s", arithmetic_instr (x, 0));
16992 return;
16993
16994 case 'S':
16995 {
16996 HOST_WIDE_INT val;
16997 const char *shift;
16998
16999 if (!shift_operator (x, SImode))
17000 {
17001 output_operand_lossage ("invalid shift operand");
17002 break;
17003 }
17004
17005 shift = shift_op (x, &val);
17006
17007 if (shift)
17008 {
17009 fprintf (stream, ", %s ", shift);
17010 if (val == -1)
17011 arm_print_operand (stream, XEXP (x, 1), 0);
17012 else
17013 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17014 }
17015 }
17016 return;
17017
17018 /* An explanation of the 'Q', 'R' and 'H' register operands:
17019
17020 In a pair of registers containing a DI or DF value the 'Q'
17021 operand returns the register number of the register containing
17022 the least significant part of the value. The 'R' operand returns
17023 the register number of the register containing the most
17024 significant part of the value.
17025
17026 The 'H' operand returns the higher of the two register numbers.
17027 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17028 same as the 'Q' operand, since the most significant part of the
17029 value is held in the lower number register. The reverse is true
17030 on systems where WORDS_BIG_ENDIAN is false.
17031
17032 The purpose of these operands is to distinguish between cases
17033 where the endian-ness of the values is important (for example
17034 when they are added together), and cases where the endian-ness
17035 is irrelevant, but the order of register operations is important.
17036 For example when loading a value from memory into a register
17037 pair, the endian-ness does not matter. Provided that the value
17038 from the lower memory address is put into the lower numbered
17039 register, and the value from the higher address is put into the
17040 higher numbered register, the load will work regardless of whether
17041 the value being loaded is big-wordian or little-wordian. The
17042 order of the two register loads can matter however, if the address
17043 of the memory location is actually held in one of the registers
17044 being overwritten by the load.
17045
17046 The 'Q' and 'R' constraints are also available for 64-bit
17047 constants. */
17048 case 'Q':
17049 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17050 {
17051 rtx part = gen_lowpart (SImode, x);
17052 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17053 return;
17054 }
17055
17056 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17057 {
17058 output_operand_lossage ("invalid operand for code '%c'", code);
17059 return;
17060 }
17061
17062 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17063 return;
17064
17065 case 'R':
17066 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17067 {
17068 enum machine_mode mode = GET_MODE (x);
17069 rtx part;
17070
17071 if (mode == VOIDmode)
17072 mode = DImode;
17073 part = gen_highpart_mode (SImode, mode, x);
17074 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17075 return;
17076 }
17077
17078 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17079 {
17080 output_operand_lossage ("invalid operand for code '%c'", code);
17081 return;
17082 }
17083
17084 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17085 return;
17086
17087 case 'H':
17088 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17089 {
17090 output_operand_lossage ("invalid operand for code '%c'", code);
17091 return;
17092 }
17093
17094 asm_fprintf (stream, "%r", REGNO (x) + 1);
17095 return;
17096
17097 case 'J':
17098 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17099 {
17100 output_operand_lossage ("invalid operand for code '%c'", code);
17101 return;
17102 }
17103
17104 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17105 return;
17106
17107 case 'K':
17108 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17109 {
17110 output_operand_lossage ("invalid operand for code '%c'", code);
17111 return;
17112 }
17113
17114 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17115 return;
17116
17117 case 'm':
17118 asm_fprintf (stream, "%r",
17119 REG_P (XEXP (x, 0))
17120 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17121 return;
17122
17123 case 'M':
17124 asm_fprintf (stream, "{%r-%r}",
17125 REGNO (x),
17126 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17127 return;
17128
17129 /* Like 'M', but writing doubleword vector registers, for use by Neon
17130 insns. */
17131 case 'h':
17132 {
17133 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17134 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17135 if (numregs == 1)
17136 asm_fprintf (stream, "{d%d}", regno);
17137 else
17138 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17139 }
17140 return;
17141
17142 case 'd':
17143 /* CONST_TRUE_RTX means always -- that's the default. */
17144 if (x == const_true_rtx)
17145 return;
17146
17147 if (!COMPARISON_P (x))
17148 {
17149 output_operand_lossage ("invalid operand for code '%c'", code);
17150 return;
17151 }
17152
17153 fputs (arm_condition_codes[get_arm_condition_code (x)],
17154 stream);
17155 return;
17156
17157 case 'D':
17158 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17159 want to do that. */
17160 if (x == const_true_rtx)
17161 {
17162 output_operand_lossage ("instruction never executed");
17163 return;
17164 }
17165 if (!COMPARISON_P (x))
17166 {
17167 output_operand_lossage ("invalid operand for code '%c'", code);
17168 return;
17169 }
17170
17171 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17172 (get_arm_condition_code (x))],
17173 stream);
17174 return;
17175
17176 case 's':
17177 case 'V':
17178 case 'W':
17179 case 'X':
17180 case 'Y':
17181 case 'Z':
17182 /* Former Maverick support, removed after GCC-4.7. */
17183 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17184 return;
17185
17186 case 'U':
17187 if (!REG_P (x)
17188 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17189 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17190 /* Bad value for wCG register number. */
17191 {
17192 output_operand_lossage ("invalid operand for code '%c'", code);
17193 return;
17194 }
17195
17196 else
17197 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17198 return;
17199
17200 /* Print an iWMMXt control register name. */
17201 case 'w':
17202 if (!CONST_INT_P (x)
17203 || INTVAL (x) < 0
17204 || INTVAL (x) >= 16)
17205 /* Bad value for wC register number. */
17206 {
17207 output_operand_lossage ("invalid operand for code '%c'", code);
17208 return;
17209 }
17210
17211 else
17212 {
17213 static const char * wc_reg_names [16] =
17214 {
17215 "wCID", "wCon", "wCSSF", "wCASF",
17216 "wC4", "wC5", "wC6", "wC7",
17217 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17218 "wC12", "wC13", "wC14", "wC15"
17219 };
17220
17221 fprintf (stream, wc_reg_names [INTVAL (x)]);
17222 }
17223 return;
17224
17225 /* Print the high single-precision register of a VFP double-precision
17226 register. */
17227 case 'p':
17228 {
17229 int mode = GET_MODE (x);
17230 int regno;
17231
17232 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
17233 {
17234 output_operand_lossage ("invalid operand for code '%c'", code);
17235 return;
17236 }
17237
17238 regno = REGNO (x);
17239 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17240 {
17241 output_operand_lossage ("invalid operand for code '%c'", code);
17242 return;
17243 }
17244
17245 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17246 }
17247 return;
17248
17249 /* Print a VFP/Neon double precision or quad precision register name. */
17250 case 'P':
17251 case 'q':
17252 {
17253 int mode = GET_MODE (x);
17254 int is_quad = (code == 'q');
17255 int regno;
17256
17257 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17258 {
17259 output_operand_lossage ("invalid operand for code '%c'", code);
17260 return;
17261 }
17262
17263 if (!REG_P (x)
17264 || !IS_VFP_REGNUM (REGNO (x)))
17265 {
17266 output_operand_lossage ("invalid operand for code '%c'", code);
17267 return;
17268 }
17269
17270 regno = REGNO (x);
17271 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17272 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17273 {
17274 output_operand_lossage ("invalid operand for code '%c'", code);
17275 return;
17276 }
17277
17278 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17279 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17280 }
17281 return;
17282
17283 /* These two codes print the low/high doubleword register of a Neon quad
17284 register, respectively. For pair-structure types, can also print
17285 low/high quadword registers. */
17286 case 'e':
17287 case 'f':
17288 {
17289 int mode = GET_MODE (x);
17290 int regno;
17291
17292 if ((GET_MODE_SIZE (mode) != 16
17293 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
17294 {
17295 output_operand_lossage ("invalid operand for code '%c'", code);
17296 return;
17297 }
17298
17299 regno = REGNO (x);
17300 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17301 {
17302 output_operand_lossage ("invalid operand for code '%c'", code);
17303 return;
17304 }
17305
17306 if (GET_MODE_SIZE (mode) == 16)
17307 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17308 + (code == 'f' ? 1 : 0));
17309 else
17310 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17311 + (code == 'f' ? 1 : 0));
17312 }
17313 return;
17314
17315 /* Print a VFPv3 floating-point constant, represented as an integer
17316 index. */
17317 case 'G':
17318 {
17319 int index = vfp3_const_double_index (x);
17320 gcc_assert (index != -1);
17321 fprintf (stream, "%d", index);
17322 }
17323 return;
17324
17325 /* Print bits representing opcode features for Neon.
17326
17327 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17328 and polynomials as unsigned.
17329
17330 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17331
17332 Bit 2 is 1 for rounding functions, 0 otherwise. */
17333
17334 /* Identify the type as 's', 'u', 'p' or 'f'. */
17335 case 'T':
17336 {
17337 HOST_WIDE_INT bits = INTVAL (x);
17338 fputc ("uspf"[bits & 3], stream);
17339 }
17340 return;
17341
17342 /* Likewise, but signed and unsigned integers are both 'i'. */
17343 case 'F':
17344 {
17345 HOST_WIDE_INT bits = INTVAL (x);
17346 fputc ("iipf"[bits & 3], stream);
17347 }
17348 return;
17349
17350 /* As for 'T', but emit 'u' instead of 'p'. */
17351 case 't':
17352 {
17353 HOST_WIDE_INT bits = INTVAL (x);
17354 fputc ("usuf"[bits & 3], stream);
17355 }
17356 return;
17357
17358 /* Bit 2: rounding (vs none). */
17359 case 'O':
17360 {
17361 HOST_WIDE_INT bits = INTVAL (x);
17362 fputs ((bits & 4) != 0 ? "r" : "", stream);
17363 }
17364 return;
17365
17366 /* Memory operand for vld1/vst1 instruction. */
17367 case 'A':
17368 {
17369 rtx addr;
17370 bool postinc = FALSE;
17371 unsigned align, memsize, align_bits;
17372
17373 gcc_assert (MEM_P (x));
17374 addr = XEXP (x, 0);
17375 if (GET_CODE (addr) == POST_INC)
17376 {
17377 postinc = 1;
17378 addr = XEXP (addr, 0);
17379 }
17380 asm_fprintf (stream, "[%r", REGNO (addr));
17381
17382 /* We know the alignment of this access, so we can emit a hint in the
17383 instruction (for some alignments) as an aid to the memory subsystem
17384 of the target. */
17385 align = MEM_ALIGN (x) >> 3;
17386 memsize = MEM_SIZE (x);
17387
17388 /* Only certain alignment specifiers are supported by the hardware. */
17389 if (memsize == 32 && (align % 32) == 0)
17390 align_bits = 256;
17391 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17392 align_bits = 128;
17393 else if (memsize >= 8 && (align % 8) == 0)
17394 align_bits = 64;
17395 else
17396 align_bits = 0;
17397
17398 if (align_bits != 0)
17399 asm_fprintf (stream, ":%d", align_bits);
17400
17401 asm_fprintf (stream, "]");
17402
17403 if (postinc)
17404 fputs("!", stream);
17405 }
17406 return;
17407
17408 case 'C':
17409 {
17410 rtx addr;
17411
17412 gcc_assert (MEM_P (x));
17413 addr = XEXP (x, 0);
17414 gcc_assert (REG_P (addr));
17415 asm_fprintf (stream, "[%r]", REGNO (addr));
17416 }
17417 return;
17418
17419 /* Translate an S register number into a D register number and element index. */
17420 case 'y':
17421 {
17422 int mode = GET_MODE (x);
17423 int regno;
17424
17425 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
17426 {
17427 output_operand_lossage ("invalid operand for code '%c'", code);
17428 return;
17429 }
17430
17431 regno = REGNO (x);
17432 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17433 {
17434 output_operand_lossage ("invalid operand for code '%c'", code);
17435 return;
17436 }
17437
17438 regno = regno - FIRST_VFP_REGNUM;
17439 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17440 }
17441 return;
17442
17443 case 'v':
17444 gcc_assert (CONST_DOUBLE_P (x));
17445 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17446 return;
17447
17448 /* Register specifier for vld1.16/vst1.16. Translate the S register
17449 number into a D register number and element index. */
17450 case 'z':
17451 {
17452 int mode = GET_MODE (x);
17453 int regno;
17454
17455 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
17456 {
17457 output_operand_lossage ("invalid operand for code '%c'", code);
17458 return;
17459 }
17460
17461 regno = REGNO (x);
17462 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17463 {
17464 output_operand_lossage ("invalid operand for code '%c'", code);
17465 return;
17466 }
17467
17468 regno = regno - FIRST_VFP_REGNUM;
17469 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17470 }
17471 return;
17472
17473 default:
17474 if (x == 0)
17475 {
17476 output_operand_lossage ("missing operand");
17477 return;
17478 }
17479
17480 switch (GET_CODE (x))
17481 {
17482 case REG:
17483 asm_fprintf (stream, "%r", REGNO (x));
17484 break;
17485
17486 case MEM:
17487 output_memory_reference_mode = GET_MODE (x);
17488 output_address (XEXP (x, 0));
17489 break;
17490
17491 case CONST_DOUBLE:
17492 if (TARGET_NEON)
17493 {
17494 char fpstr[20];
17495 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17496 sizeof (fpstr), 0, 1);
17497 fprintf (stream, "#%s", fpstr);
17498 }
17499 else
17500 fprintf (stream, "#%s", fp_immediate_constant (x));
17501 break;
17502
17503 default:
17504 gcc_assert (GET_CODE (x) != NEG);
17505 fputc ('#', stream);
17506 if (GET_CODE (x) == HIGH)
17507 {
17508 fputs (":lower16:", stream);
17509 x = XEXP (x, 0);
17510 }
17511
17512 output_addr_const (stream, x);
17513 break;
17514 }
17515 }
17516 }
17517 \f
17518 /* Target hook for printing a memory address. */
17519 static void
17520 arm_print_operand_address (FILE *stream, rtx x)
17521 {
17522 if (TARGET_32BIT)
17523 {
17524 int is_minus = GET_CODE (x) == MINUS;
17525
17526 if (REG_P (x))
17527 asm_fprintf (stream, "[%r]", REGNO (x));
17528 else if (GET_CODE (x) == PLUS || is_minus)
17529 {
17530 rtx base = XEXP (x, 0);
17531 rtx index = XEXP (x, 1);
17532 HOST_WIDE_INT offset = 0;
17533 if (!REG_P (base)
17534 || (REG_P (index) && REGNO (index) == SP_REGNUM))
17535 {
17536 /* Ensure that BASE is a register. */
17537 /* (one of them must be). */
17538 /* Also ensure the SP is not used as in index register. */
17539 rtx temp = base;
17540 base = index;
17541 index = temp;
17542 }
17543 switch (GET_CODE (index))
17544 {
17545 case CONST_INT:
17546 offset = INTVAL (index);
17547 if (is_minus)
17548 offset = -offset;
17549 asm_fprintf (stream, "[%r, #%wd]",
17550 REGNO (base), offset);
17551 break;
17552
17553 case REG:
17554 asm_fprintf (stream, "[%r, %s%r]",
17555 REGNO (base), is_minus ? "-" : "",
17556 REGNO (index));
17557 break;
17558
17559 case MULT:
17560 case ASHIFTRT:
17561 case LSHIFTRT:
17562 case ASHIFT:
17563 case ROTATERT:
17564 {
17565 asm_fprintf (stream, "[%r, %s%r",
17566 REGNO (base), is_minus ? "-" : "",
17567 REGNO (XEXP (index, 0)));
17568 arm_print_operand (stream, index, 'S');
17569 fputs ("]", stream);
17570 break;
17571 }
17572
17573 default:
17574 gcc_unreachable ();
17575 }
17576 }
17577 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17578 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17579 {
17580 extern enum machine_mode output_memory_reference_mode;
17581
17582 gcc_assert (REG_P (XEXP (x, 0)));
17583
17584 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17585 asm_fprintf (stream, "[%r, #%s%d]!",
17586 REGNO (XEXP (x, 0)),
17587 GET_CODE (x) == PRE_DEC ? "-" : "",
17588 GET_MODE_SIZE (output_memory_reference_mode));
17589 else
17590 asm_fprintf (stream, "[%r], #%s%d",
17591 REGNO (XEXP (x, 0)),
17592 GET_CODE (x) == POST_DEC ? "-" : "",
17593 GET_MODE_SIZE (output_memory_reference_mode));
17594 }
17595 else if (GET_CODE (x) == PRE_MODIFY)
17596 {
17597 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17598 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17599 asm_fprintf (stream, "#%wd]!",
17600 INTVAL (XEXP (XEXP (x, 1), 1)));
17601 else
17602 asm_fprintf (stream, "%r]!",
17603 REGNO (XEXP (XEXP (x, 1), 1)));
17604 }
17605 else if (GET_CODE (x) == POST_MODIFY)
17606 {
17607 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17608 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17609 asm_fprintf (stream, "#%wd",
17610 INTVAL (XEXP (XEXP (x, 1), 1)));
17611 else
17612 asm_fprintf (stream, "%r",
17613 REGNO (XEXP (XEXP (x, 1), 1)));
17614 }
17615 else output_addr_const (stream, x);
17616 }
17617 else
17618 {
17619 if (REG_P (x))
17620 asm_fprintf (stream, "[%r]", REGNO (x));
17621 else if (GET_CODE (x) == POST_INC)
17622 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17623 else if (GET_CODE (x) == PLUS)
17624 {
17625 gcc_assert (REG_P (XEXP (x, 0)));
17626 if (CONST_INT_P (XEXP (x, 1)))
17627 asm_fprintf (stream, "[%r, #%wd]",
17628 REGNO (XEXP (x, 0)),
17629 INTVAL (XEXP (x, 1)));
17630 else
17631 asm_fprintf (stream, "[%r, %r]",
17632 REGNO (XEXP (x, 0)),
17633 REGNO (XEXP (x, 1)));
17634 }
17635 else
17636 output_addr_const (stream, x);
17637 }
17638 }
17639 \f
17640 /* Target hook for indicating whether a punctuation character for
17641 TARGET_PRINT_OPERAND is valid. */
17642 static bool
17643 arm_print_operand_punct_valid_p (unsigned char code)
17644 {
17645 return (code == '@' || code == '|' || code == '.'
17646 || code == '(' || code == ')' || code == '#'
17647 || (TARGET_32BIT && (code == '?'))
17648 || (TARGET_THUMB2 && (code == '!'))
17649 || (TARGET_THUMB && (code == '_')));
17650 }
17651 \f
17652 /* Target hook for assembling integer objects. The ARM version needs to
17653 handle word-sized values specially. */
17654 static bool
17655 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17656 {
17657 enum machine_mode mode;
17658
17659 if (size == UNITS_PER_WORD && aligned_p)
17660 {
17661 fputs ("\t.word\t", asm_out_file);
17662 output_addr_const (asm_out_file, x);
17663
17664 /* Mark symbols as position independent. We only do this in the
17665 .text segment, not in the .data segment. */
17666 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17667 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17668 {
17669 /* See legitimize_pic_address for an explanation of the
17670 TARGET_VXWORKS_RTP check. */
17671 if (TARGET_VXWORKS_RTP
17672 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17673 fputs ("(GOT)", asm_out_file);
17674 else
17675 fputs ("(GOTOFF)", asm_out_file);
17676 }
17677 fputc ('\n', asm_out_file);
17678 return true;
17679 }
17680
17681 mode = GET_MODE (x);
17682
17683 if (arm_vector_mode_supported_p (mode))
17684 {
17685 int i, units;
17686
17687 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17688
17689 units = CONST_VECTOR_NUNITS (x);
17690 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17691
17692 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17693 for (i = 0; i < units; i++)
17694 {
17695 rtx elt = CONST_VECTOR_ELT (x, i);
17696 assemble_integer
17697 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17698 }
17699 else
17700 for (i = 0; i < units; i++)
17701 {
17702 rtx elt = CONST_VECTOR_ELT (x, i);
17703 REAL_VALUE_TYPE rval;
17704
17705 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17706
17707 assemble_real
17708 (rval, GET_MODE_INNER (mode),
17709 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17710 }
17711
17712 return true;
17713 }
17714
17715 return default_assemble_integer (x, size, aligned_p);
17716 }
17717
17718 static void
17719 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17720 {
17721 section *s;
17722
17723 if (!TARGET_AAPCS_BASED)
17724 {
17725 (is_ctor ?
17726 default_named_section_asm_out_constructor
17727 : default_named_section_asm_out_destructor) (symbol, priority);
17728 return;
17729 }
17730
17731 /* Put these in the .init_array section, using a special relocation. */
17732 if (priority != DEFAULT_INIT_PRIORITY)
17733 {
17734 char buf[18];
17735 sprintf (buf, "%s.%.5u",
17736 is_ctor ? ".init_array" : ".fini_array",
17737 priority);
17738 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17739 }
17740 else if (is_ctor)
17741 s = ctors_section;
17742 else
17743 s = dtors_section;
17744
17745 switch_to_section (s);
17746 assemble_align (POINTER_SIZE);
17747 fputs ("\t.word\t", asm_out_file);
17748 output_addr_const (asm_out_file, symbol);
17749 fputs ("(target1)\n", asm_out_file);
17750 }
17751
17752 /* Add a function to the list of static constructors. */
17753
17754 static void
17755 arm_elf_asm_constructor (rtx symbol, int priority)
17756 {
17757 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17758 }
17759
17760 /* Add a function to the list of static destructors. */
17761
17762 static void
17763 arm_elf_asm_destructor (rtx symbol, int priority)
17764 {
17765 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17766 }
17767 \f
17768 /* A finite state machine takes care of noticing whether or not instructions
17769 can be conditionally executed, and thus decrease execution time and code
17770 size by deleting branch instructions. The fsm is controlled by
17771 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17772
17773 /* The state of the fsm controlling condition codes are:
17774 0: normal, do nothing special
17775 1: make ASM_OUTPUT_OPCODE not output this instruction
17776 2: make ASM_OUTPUT_OPCODE not output this instruction
17777 3: make instructions conditional
17778 4: make instructions conditional
17779
17780 State transitions (state->state by whom under condition):
17781 0 -> 1 final_prescan_insn if the `target' is a label
17782 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17783 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17784 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17785 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17786 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17787 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17788 (the target insn is arm_target_insn).
17789
17790 If the jump clobbers the conditions then we use states 2 and 4.
17791
17792 A similar thing can be done with conditional return insns.
17793
17794 XXX In case the `target' is an unconditional branch, this conditionalising
17795 of the instructions always reduces code size, but not always execution
17796 time. But then, I want to reduce the code size to somewhere near what
17797 /bin/cc produces. */
17798
17799 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17800 instructions. When a COND_EXEC instruction is seen the subsequent
17801 instructions are scanned so that multiple conditional instructions can be
17802 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17803 specify the length and true/false mask for the IT block. These will be
17804 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17805
17806 /* Returns the index of the ARM condition code string in
17807 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17808 COMPARISON should be an rtx like `(eq (...) (...))'. */
17809
17810 enum arm_cond_code
17811 maybe_get_arm_condition_code (rtx comparison)
17812 {
17813 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17814 enum arm_cond_code code;
17815 enum rtx_code comp_code = GET_CODE (comparison);
17816
17817 if (GET_MODE_CLASS (mode) != MODE_CC)
17818 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17819 XEXP (comparison, 1));
17820
17821 switch (mode)
17822 {
17823 case CC_DNEmode: code = ARM_NE; goto dominance;
17824 case CC_DEQmode: code = ARM_EQ; goto dominance;
17825 case CC_DGEmode: code = ARM_GE; goto dominance;
17826 case CC_DGTmode: code = ARM_GT; goto dominance;
17827 case CC_DLEmode: code = ARM_LE; goto dominance;
17828 case CC_DLTmode: code = ARM_LT; goto dominance;
17829 case CC_DGEUmode: code = ARM_CS; goto dominance;
17830 case CC_DGTUmode: code = ARM_HI; goto dominance;
17831 case CC_DLEUmode: code = ARM_LS; goto dominance;
17832 case CC_DLTUmode: code = ARM_CC;
17833
17834 dominance:
17835 if (comp_code == EQ)
17836 return ARM_INVERSE_CONDITION_CODE (code);
17837 if (comp_code == NE)
17838 return code;
17839 return ARM_NV;
17840
17841 case CC_NOOVmode:
17842 switch (comp_code)
17843 {
17844 case NE: return ARM_NE;
17845 case EQ: return ARM_EQ;
17846 case GE: return ARM_PL;
17847 case LT: return ARM_MI;
17848 default: return ARM_NV;
17849 }
17850
17851 case CC_Zmode:
17852 switch (comp_code)
17853 {
17854 case NE: return ARM_NE;
17855 case EQ: return ARM_EQ;
17856 default: return ARM_NV;
17857 }
17858
17859 case CC_Nmode:
17860 switch (comp_code)
17861 {
17862 case NE: return ARM_MI;
17863 case EQ: return ARM_PL;
17864 default: return ARM_NV;
17865 }
17866
17867 case CCFPEmode:
17868 case CCFPmode:
17869 /* We can handle all cases except UNEQ and LTGT. */
17870 switch (comp_code)
17871 {
17872 case GE: return ARM_GE;
17873 case GT: return ARM_GT;
17874 case LE: return ARM_LS;
17875 case LT: return ARM_MI;
17876 case NE: return ARM_NE;
17877 case EQ: return ARM_EQ;
17878 case ORDERED: return ARM_VC;
17879 case UNORDERED: return ARM_VS;
17880 case UNLT: return ARM_LT;
17881 case UNLE: return ARM_LE;
17882 case UNGT: return ARM_HI;
17883 case UNGE: return ARM_PL;
17884 /* UNEQ and LTGT do not have a representation. */
17885 case UNEQ: /* Fall through. */
17886 case LTGT: /* Fall through. */
17887 default: return ARM_NV;
17888 }
17889
17890 case CC_SWPmode:
17891 switch (comp_code)
17892 {
17893 case NE: return ARM_NE;
17894 case EQ: return ARM_EQ;
17895 case GE: return ARM_LE;
17896 case GT: return ARM_LT;
17897 case LE: return ARM_GE;
17898 case LT: return ARM_GT;
17899 case GEU: return ARM_LS;
17900 case GTU: return ARM_CC;
17901 case LEU: return ARM_CS;
17902 case LTU: return ARM_HI;
17903 default: return ARM_NV;
17904 }
17905
17906 case CC_Cmode:
17907 switch (comp_code)
17908 {
17909 case LTU: return ARM_CS;
17910 case GEU: return ARM_CC;
17911 default: return ARM_NV;
17912 }
17913
17914 case CC_CZmode:
17915 switch (comp_code)
17916 {
17917 case NE: return ARM_NE;
17918 case EQ: return ARM_EQ;
17919 case GEU: return ARM_CS;
17920 case GTU: return ARM_HI;
17921 case LEU: return ARM_LS;
17922 case LTU: return ARM_CC;
17923 default: return ARM_NV;
17924 }
17925
17926 case CC_NCVmode:
17927 switch (comp_code)
17928 {
17929 case GE: return ARM_GE;
17930 case LT: return ARM_LT;
17931 case GEU: return ARM_CS;
17932 case LTU: return ARM_CC;
17933 default: return ARM_NV;
17934 }
17935
17936 case CCmode:
17937 switch (comp_code)
17938 {
17939 case NE: return ARM_NE;
17940 case EQ: return ARM_EQ;
17941 case GE: return ARM_GE;
17942 case GT: return ARM_GT;
17943 case LE: return ARM_LE;
17944 case LT: return ARM_LT;
17945 case GEU: return ARM_CS;
17946 case GTU: return ARM_HI;
17947 case LEU: return ARM_LS;
17948 case LTU: return ARM_CC;
17949 default: return ARM_NV;
17950 }
17951
17952 default: gcc_unreachable ();
17953 }
17954 }
17955
17956 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17957 static enum arm_cond_code
17958 get_arm_condition_code (rtx comparison)
17959 {
17960 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17961 gcc_assert (code != ARM_NV);
17962 return code;
17963 }
17964
17965 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17966 instructions. */
17967 void
17968 thumb2_final_prescan_insn (rtx insn)
17969 {
17970 rtx first_insn = insn;
17971 rtx body = PATTERN (insn);
17972 rtx predicate;
17973 enum arm_cond_code code;
17974 int n;
17975 int mask;
17976
17977 /* Remove the previous insn from the count of insns to be output. */
17978 if (arm_condexec_count)
17979 arm_condexec_count--;
17980
17981 /* Nothing to do if we are already inside a conditional block. */
17982 if (arm_condexec_count)
17983 return;
17984
17985 if (GET_CODE (body) != COND_EXEC)
17986 return;
17987
17988 /* Conditional jumps are implemented directly. */
17989 if (JUMP_P (insn))
17990 return;
17991
17992 predicate = COND_EXEC_TEST (body);
17993 arm_current_cc = get_arm_condition_code (predicate);
17994
17995 n = get_attr_ce_count (insn);
17996 arm_condexec_count = 1;
17997 arm_condexec_mask = (1 << n) - 1;
17998 arm_condexec_masklen = n;
17999 /* See if subsequent instructions can be combined into the same block. */
18000 for (;;)
18001 {
18002 insn = next_nonnote_insn (insn);
18003
18004 /* Jumping into the middle of an IT block is illegal, so a label or
18005 barrier terminates the block. */
18006 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
18007 break;
18008
18009 body = PATTERN (insn);
18010 /* USE and CLOBBER aren't really insns, so just skip them. */
18011 if (GET_CODE (body) == USE
18012 || GET_CODE (body) == CLOBBER)
18013 continue;
18014
18015 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18016 if (GET_CODE (body) != COND_EXEC)
18017 break;
18018 /* Allow up to 4 conditionally executed instructions in a block. */
18019 n = get_attr_ce_count (insn);
18020 if (arm_condexec_masklen + n > 4)
18021 break;
18022
18023 predicate = COND_EXEC_TEST (body);
18024 code = get_arm_condition_code (predicate);
18025 mask = (1 << n) - 1;
18026 if (arm_current_cc == code)
18027 arm_condexec_mask |= (mask << arm_condexec_masklen);
18028 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18029 break;
18030
18031 arm_condexec_count++;
18032 arm_condexec_masklen += n;
18033
18034 /* A jump must be the last instruction in a conditional block. */
18035 if (JUMP_P (insn))
18036 break;
18037 }
18038 /* Restore recog_data (getting the attributes of other insns can
18039 destroy this array, but final.c assumes that it remains intact
18040 across this call). */
18041 extract_constrain_insn_cached (first_insn);
18042 }
18043
18044 void
18045 arm_final_prescan_insn (rtx insn)
18046 {
18047 /* BODY will hold the body of INSN. */
18048 rtx body = PATTERN (insn);
18049
18050 /* This will be 1 if trying to repeat the trick, and things need to be
18051 reversed if it appears to fail. */
18052 int reverse = 0;
18053
18054 /* If we start with a return insn, we only succeed if we find another one. */
18055 int seeking_return = 0;
18056 enum rtx_code return_code = UNKNOWN;
18057
18058 /* START_INSN will hold the insn from where we start looking. This is the
18059 first insn after the following code_label if REVERSE is true. */
18060 rtx start_insn = insn;
18061
18062 /* If in state 4, check if the target branch is reached, in order to
18063 change back to state 0. */
18064 if (arm_ccfsm_state == 4)
18065 {
18066 if (insn == arm_target_insn)
18067 {
18068 arm_target_insn = NULL;
18069 arm_ccfsm_state = 0;
18070 }
18071 return;
18072 }
18073
18074 /* If in state 3, it is possible to repeat the trick, if this insn is an
18075 unconditional branch to a label, and immediately following this branch
18076 is the previous target label which is only used once, and the label this
18077 branch jumps to is not too far off. */
18078 if (arm_ccfsm_state == 3)
18079 {
18080 if (simplejump_p (insn))
18081 {
18082 start_insn = next_nonnote_insn (start_insn);
18083 if (BARRIER_P (start_insn))
18084 {
18085 /* XXX Isn't this always a barrier? */
18086 start_insn = next_nonnote_insn (start_insn);
18087 }
18088 if (LABEL_P (start_insn)
18089 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18090 && LABEL_NUSES (start_insn) == 1)
18091 reverse = TRUE;
18092 else
18093 return;
18094 }
18095 else if (ANY_RETURN_P (body))
18096 {
18097 start_insn = next_nonnote_insn (start_insn);
18098 if (BARRIER_P (start_insn))
18099 start_insn = next_nonnote_insn (start_insn);
18100 if (LABEL_P (start_insn)
18101 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18102 && LABEL_NUSES (start_insn) == 1)
18103 {
18104 reverse = TRUE;
18105 seeking_return = 1;
18106 return_code = GET_CODE (body);
18107 }
18108 else
18109 return;
18110 }
18111 else
18112 return;
18113 }
18114
18115 gcc_assert (!arm_ccfsm_state || reverse);
18116 if (!JUMP_P (insn))
18117 return;
18118
18119 /* This jump might be paralleled with a clobber of the condition codes
18120 the jump should always come first */
18121 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18122 body = XVECEXP (body, 0, 0);
18123
18124 if (reverse
18125 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18126 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18127 {
18128 int insns_skipped;
18129 int fail = FALSE, succeed = FALSE;
18130 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18131 int then_not_else = TRUE;
18132 rtx this_insn = start_insn, label = 0;
18133
18134 /* Register the insn jumped to. */
18135 if (reverse)
18136 {
18137 if (!seeking_return)
18138 label = XEXP (SET_SRC (body), 0);
18139 }
18140 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18141 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18142 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18143 {
18144 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18145 then_not_else = FALSE;
18146 }
18147 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18148 {
18149 seeking_return = 1;
18150 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18151 }
18152 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18153 {
18154 seeking_return = 1;
18155 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18156 then_not_else = FALSE;
18157 }
18158 else
18159 gcc_unreachable ();
18160
18161 /* See how many insns this branch skips, and what kind of insns. If all
18162 insns are okay, and the label or unconditional branch to the same
18163 label is not too far away, succeed. */
18164 for (insns_skipped = 0;
18165 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18166 {
18167 rtx scanbody;
18168
18169 this_insn = next_nonnote_insn (this_insn);
18170 if (!this_insn)
18171 break;
18172
18173 switch (GET_CODE (this_insn))
18174 {
18175 case CODE_LABEL:
18176 /* Succeed if it is the target label, otherwise fail since
18177 control falls in from somewhere else. */
18178 if (this_insn == label)
18179 {
18180 arm_ccfsm_state = 1;
18181 succeed = TRUE;
18182 }
18183 else
18184 fail = TRUE;
18185 break;
18186
18187 case BARRIER:
18188 /* Succeed if the following insn is the target label.
18189 Otherwise fail.
18190 If return insns are used then the last insn in a function
18191 will be a barrier. */
18192 this_insn = next_nonnote_insn (this_insn);
18193 if (this_insn && this_insn == label)
18194 {
18195 arm_ccfsm_state = 1;
18196 succeed = TRUE;
18197 }
18198 else
18199 fail = TRUE;
18200 break;
18201
18202 case CALL_INSN:
18203 /* The AAPCS says that conditional calls should not be
18204 used since they make interworking inefficient (the
18205 linker can't transform BL<cond> into BLX). That's
18206 only a problem if the machine has BLX. */
18207 if (arm_arch5)
18208 {
18209 fail = TRUE;
18210 break;
18211 }
18212
18213 /* Succeed if the following insn is the target label, or
18214 if the following two insns are a barrier and the
18215 target label. */
18216 this_insn = next_nonnote_insn (this_insn);
18217 if (this_insn && BARRIER_P (this_insn))
18218 this_insn = next_nonnote_insn (this_insn);
18219
18220 if (this_insn && this_insn == label
18221 && insns_skipped < max_insns_skipped)
18222 {
18223 arm_ccfsm_state = 1;
18224 succeed = TRUE;
18225 }
18226 else
18227 fail = TRUE;
18228 break;
18229
18230 case JUMP_INSN:
18231 /* If this is an unconditional branch to the same label, succeed.
18232 If it is to another label, do nothing. If it is conditional,
18233 fail. */
18234 /* XXX Probably, the tests for SET and the PC are
18235 unnecessary. */
18236
18237 scanbody = PATTERN (this_insn);
18238 if (GET_CODE (scanbody) == SET
18239 && GET_CODE (SET_DEST (scanbody)) == PC)
18240 {
18241 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18242 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18243 {
18244 arm_ccfsm_state = 2;
18245 succeed = TRUE;
18246 }
18247 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18248 fail = TRUE;
18249 }
18250 /* Fail if a conditional return is undesirable (e.g. on a
18251 StrongARM), but still allow this if optimizing for size. */
18252 else if (GET_CODE (scanbody) == return_code
18253 && !use_return_insn (TRUE, NULL)
18254 && !optimize_size)
18255 fail = TRUE;
18256 else if (GET_CODE (scanbody) == return_code)
18257 {
18258 arm_ccfsm_state = 2;
18259 succeed = TRUE;
18260 }
18261 else if (GET_CODE (scanbody) == PARALLEL)
18262 {
18263 switch (get_attr_conds (this_insn))
18264 {
18265 case CONDS_NOCOND:
18266 break;
18267 default:
18268 fail = TRUE;
18269 break;
18270 }
18271 }
18272 else
18273 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18274
18275 break;
18276
18277 case INSN:
18278 /* Instructions using or affecting the condition codes make it
18279 fail. */
18280 scanbody = PATTERN (this_insn);
18281 if (!(GET_CODE (scanbody) == SET
18282 || GET_CODE (scanbody) == PARALLEL)
18283 || get_attr_conds (this_insn) != CONDS_NOCOND)
18284 fail = TRUE;
18285 break;
18286
18287 default:
18288 break;
18289 }
18290 }
18291 if (succeed)
18292 {
18293 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18294 arm_target_label = CODE_LABEL_NUMBER (label);
18295 else
18296 {
18297 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18298
18299 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18300 {
18301 this_insn = next_nonnote_insn (this_insn);
18302 gcc_assert (!this_insn
18303 || (!BARRIER_P (this_insn)
18304 && !LABEL_P (this_insn)));
18305 }
18306 if (!this_insn)
18307 {
18308 /* Oh, dear! we ran off the end.. give up. */
18309 extract_constrain_insn_cached (insn);
18310 arm_ccfsm_state = 0;
18311 arm_target_insn = NULL;
18312 return;
18313 }
18314 arm_target_insn = this_insn;
18315 }
18316
18317 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18318 what it was. */
18319 if (!reverse)
18320 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18321
18322 if (reverse || then_not_else)
18323 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18324 }
18325
18326 /* Restore recog_data (getting the attributes of other insns can
18327 destroy this array, but final.c assumes that it remains intact
18328 across this call. */
18329 extract_constrain_insn_cached (insn);
18330 }
18331 }
18332
18333 /* Output IT instructions. */
18334 void
18335 thumb2_asm_output_opcode (FILE * stream)
18336 {
18337 char buff[5];
18338 int n;
18339
18340 if (arm_condexec_mask)
18341 {
18342 for (n = 0; n < arm_condexec_masklen; n++)
18343 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18344 buff[n] = 0;
18345 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18346 arm_condition_codes[arm_current_cc]);
18347 arm_condexec_mask = 0;
18348 }
18349 }
18350
18351 /* Returns true if REGNO is a valid register
18352 for holding a quantity of type MODE. */
18353 int
18354 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18355 {
18356 if (GET_MODE_CLASS (mode) == MODE_CC)
18357 return (regno == CC_REGNUM
18358 || (TARGET_HARD_FLOAT && TARGET_VFP
18359 && regno == VFPCC_REGNUM));
18360
18361 if (TARGET_THUMB1)
18362 /* For the Thumb we only allow values bigger than SImode in
18363 registers 0 - 6, so that there is always a second low
18364 register available to hold the upper part of the value.
18365 We probably we ought to ensure that the register is the
18366 start of an even numbered register pair. */
18367 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18368
18369 if (TARGET_HARD_FLOAT && TARGET_VFP
18370 && IS_VFP_REGNUM (regno))
18371 {
18372 if (mode == SFmode || mode == SImode)
18373 return VFP_REGNO_OK_FOR_SINGLE (regno);
18374
18375 if (mode == DFmode)
18376 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18377
18378 /* VFP registers can hold HFmode values, but there is no point in
18379 putting them there unless we have hardware conversion insns. */
18380 if (mode == HFmode)
18381 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18382
18383 if (TARGET_NEON)
18384 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18385 || (VALID_NEON_QREG_MODE (mode)
18386 && NEON_REGNO_OK_FOR_QUAD (regno))
18387 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18388 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18389 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18390 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18391 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18392
18393 return FALSE;
18394 }
18395
18396 if (TARGET_REALLY_IWMMXT)
18397 {
18398 if (IS_IWMMXT_GR_REGNUM (regno))
18399 return mode == SImode;
18400
18401 if (IS_IWMMXT_REGNUM (regno))
18402 return VALID_IWMMXT_REG_MODE (mode);
18403 }
18404
18405 /* We allow almost any value to be stored in the general registers.
18406 Restrict doubleword quantities to even register pairs so that we can
18407 use ldrd. Do not allow very large Neon structure opaque modes in
18408 general registers; they would use too many. */
18409 if (regno <= LAST_ARM_REGNUM)
18410 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18411 && ARM_NUM_REGS (mode) <= 4;
18412
18413 if (regno == FRAME_POINTER_REGNUM
18414 || regno == ARG_POINTER_REGNUM)
18415 /* We only allow integers in the fake hard registers. */
18416 return GET_MODE_CLASS (mode) == MODE_INT;
18417
18418 return FALSE;
18419 }
18420
18421 /* Implement MODES_TIEABLE_P. */
18422
18423 bool
18424 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18425 {
18426 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18427 return true;
18428
18429 /* We specifically want to allow elements of "structure" modes to
18430 be tieable to the structure. This more general condition allows
18431 other rarer situations too. */
18432 if (TARGET_NEON
18433 && (VALID_NEON_DREG_MODE (mode1)
18434 || VALID_NEON_QREG_MODE (mode1)
18435 || VALID_NEON_STRUCT_MODE (mode1))
18436 && (VALID_NEON_DREG_MODE (mode2)
18437 || VALID_NEON_QREG_MODE (mode2)
18438 || VALID_NEON_STRUCT_MODE (mode2)))
18439 return true;
18440
18441 return false;
18442 }
18443
18444 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18445 not used in arm mode. */
18446
18447 enum reg_class
18448 arm_regno_class (int regno)
18449 {
18450 if (TARGET_THUMB1)
18451 {
18452 if (regno == STACK_POINTER_REGNUM)
18453 return STACK_REG;
18454 if (regno == CC_REGNUM)
18455 return CC_REG;
18456 if (regno < 8)
18457 return LO_REGS;
18458 return HI_REGS;
18459 }
18460
18461 if (TARGET_THUMB2 && regno < 8)
18462 return LO_REGS;
18463
18464 if ( regno <= LAST_ARM_REGNUM
18465 || regno == FRAME_POINTER_REGNUM
18466 || regno == ARG_POINTER_REGNUM)
18467 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18468
18469 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18470 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18471
18472 if (IS_VFP_REGNUM (regno))
18473 {
18474 if (regno <= D7_VFP_REGNUM)
18475 return VFP_D0_D7_REGS;
18476 else if (regno <= LAST_LO_VFP_REGNUM)
18477 return VFP_LO_REGS;
18478 else
18479 return VFP_HI_REGS;
18480 }
18481
18482 if (IS_IWMMXT_REGNUM (regno))
18483 return IWMMXT_REGS;
18484
18485 if (IS_IWMMXT_GR_REGNUM (regno))
18486 return IWMMXT_GR_REGS;
18487
18488 return NO_REGS;
18489 }
18490
18491 /* Handle a special case when computing the offset
18492 of an argument from the frame pointer. */
18493 int
18494 arm_debugger_arg_offset (int value, rtx addr)
18495 {
18496 rtx insn;
18497
18498 /* We are only interested if dbxout_parms() failed to compute the offset. */
18499 if (value != 0)
18500 return 0;
18501
18502 /* We can only cope with the case where the address is held in a register. */
18503 if (!REG_P (addr))
18504 return 0;
18505
18506 /* If we are using the frame pointer to point at the argument, then
18507 an offset of 0 is correct. */
18508 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18509 return 0;
18510
18511 /* If we are using the stack pointer to point at the
18512 argument, then an offset of 0 is correct. */
18513 /* ??? Check this is consistent with thumb2 frame layout. */
18514 if ((TARGET_THUMB || !frame_pointer_needed)
18515 && REGNO (addr) == SP_REGNUM)
18516 return 0;
18517
18518 /* Oh dear. The argument is pointed to by a register rather
18519 than being held in a register, or being stored at a known
18520 offset from the frame pointer. Since GDB only understands
18521 those two kinds of argument we must translate the address
18522 held in the register into an offset from the frame pointer.
18523 We do this by searching through the insns for the function
18524 looking to see where this register gets its value. If the
18525 register is initialized from the frame pointer plus an offset
18526 then we are in luck and we can continue, otherwise we give up.
18527
18528 This code is exercised by producing debugging information
18529 for a function with arguments like this:
18530
18531 double func (double a, double b, int c, double d) {return d;}
18532
18533 Without this code the stab for parameter 'd' will be set to
18534 an offset of 0 from the frame pointer, rather than 8. */
18535
18536 /* The if() statement says:
18537
18538 If the insn is a normal instruction
18539 and if the insn is setting the value in a register
18540 and if the register being set is the register holding the address of the argument
18541 and if the address is computing by an addition
18542 that involves adding to a register
18543 which is the frame pointer
18544 a constant integer
18545
18546 then... */
18547
18548 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18549 {
18550 if ( NONJUMP_INSN_P (insn)
18551 && GET_CODE (PATTERN (insn)) == SET
18552 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18553 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18554 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
18555 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18556 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
18557 )
18558 {
18559 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18560
18561 break;
18562 }
18563 }
18564
18565 if (value == 0)
18566 {
18567 debug_rtx (addr);
18568 warning (0, "unable to compute real location of stacked parameter");
18569 value = 8; /* XXX magic hack */
18570 }
18571
18572 return value;
18573 }
18574 \f
18575 typedef enum {
18576 T_V8QI,
18577 T_V4HI,
18578 T_V2SI,
18579 T_V2SF,
18580 T_DI,
18581 T_V16QI,
18582 T_V8HI,
18583 T_V4SI,
18584 T_V4SF,
18585 T_V2DI,
18586 T_TI,
18587 T_EI,
18588 T_OI,
18589 T_MAX /* Size of enum. Keep last. */
18590 } neon_builtin_type_mode;
18591
18592 #define TYPE_MODE_BIT(X) (1 << (X))
18593
18594 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18595 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18596 | TYPE_MODE_BIT (T_DI))
18597 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18598 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18599 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18600
18601 #define v8qi_UP T_V8QI
18602 #define v4hi_UP T_V4HI
18603 #define v2si_UP T_V2SI
18604 #define v2sf_UP T_V2SF
18605 #define di_UP T_DI
18606 #define v16qi_UP T_V16QI
18607 #define v8hi_UP T_V8HI
18608 #define v4si_UP T_V4SI
18609 #define v4sf_UP T_V4SF
18610 #define v2di_UP T_V2DI
18611 #define ti_UP T_TI
18612 #define ei_UP T_EI
18613 #define oi_UP T_OI
18614
18615 #define UP(X) X##_UP
18616
18617 typedef enum {
18618 NEON_BINOP,
18619 NEON_TERNOP,
18620 NEON_UNOP,
18621 NEON_GETLANE,
18622 NEON_SETLANE,
18623 NEON_CREATE,
18624 NEON_DUP,
18625 NEON_DUPLANE,
18626 NEON_COMBINE,
18627 NEON_SPLIT,
18628 NEON_LANEMUL,
18629 NEON_LANEMULL,
18630 NEON_LANEMULH,
18631 NEON_LANEMAC,
18632 NEON_SCALARMUL,
18633 NEON_SCALARMULL,
18634 NEON_SCALARMULH,
18635 NEON_SCALARMAC,
18636 NEON_CONVERT,
18637 NEON_FIXCONV,
18638 NEON_SELECT,
18639 NEON_RESULTPAIR,
18640 NEON_REINTERP,
18641 NEON_VTBL,
18642 NEON_VTBX,
18643 NEON_LOAD1,
18644 NEON_LOAD1LANE,
18645 NEON_STORE1,
18646 NEON_STORE1LANE,
18647 NEON_LOADSTRUCT,
18648 NEON_LOADSTRUCTLANE,
18649 NEON_STORESTRUCT,
18650 NEON_STORESTRUCTLANE,
18651 NEON_LOGICBINOP,
18652 NEON_SHIFTINSERT,
18653 NEON_SHIFTIMM,
18654 NEON_SHIFTACC
18655 } neon_itype;
18656
18657 typedef struct {
18658 const char *name;
18659 const neon_itype itype;
18660 const neon_builtin_type_mode mode;
18661 const enum insn_code code;
18662 unsigned int fcode;
18663 } neon_builtin_datum;
18664
18665 #define CF(N,X) CODE_FOR_neon_##N##X
18666
18667 #define VAR1(T, N, A) \
18668 {#N, NEON_##T, UP (A), CF (N, A), 0}
18669 #define VAR2(T, N, A, B) \
18670 VAR1 (T, N, A), \
18671 {#N, NEON_##T, UP (B), CF (N, B), 0}
18672 #define VAR3(T, N, A, B, C) \
18673 VAR2 (T, N, A, B), \
18674 {#N, NEON_##T, UP (C), CF (N, C), 0}
18675 #define VAR4(T, N, A, B, C, D) \
18676 VAR3 (T, N, A, B, C), \
18677 {#N, NEON_##T, UP (D), CF (N, D), 0}
18678 #define VAR5(T, N, A, B, C, D, E) \
18679 VAR4 (T, N, A, B, C, D), \
18680 {#N, NEON_##T, UP (E), CF (N, E), 0}
18681 #define VAR6(T, N, A, B, C, D, E, F) \
18682 VAR5 (T, N, A, B, C, D, E), \
18683 {#N, NEON_##T, UP (F), CF (N, F), 0}
18684 #define VAR7(T, N, A, B, C, D, E, F, G) \
18685 VAR6 (T, N, A, B, C, D, E, F), \
18686 {#N, NEON_##T, UP (G), CF (N, G), 0}
18687 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18688 VAR7 (T, N, A, B, C, D, E, F, G), \
18689 {#N, NEON_##T, UP (H), CF (N, H), 0}
18690 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18691 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18692 {#N, NEON_##T, UP (I), CF (N, I), 0}
18693 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18694 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18695 {#N, NEON_##T, UP (J), CF (N, J), 0}
18696
18697 /* The mode entries in the following table correspond to the "key" type of the
18698 instruction variant, i.e. equivalent to that which would be specified after
18699 the assembler mnemonic, which usually refers to the last vector operand.
18700 (Signed/unsigned/polynomial types are not differentiated between though, and
18701 are all mapped onto the same mode for a given element size.) The modes
18702 listed per instruction should be the same as those defined for that
18703 instruction's pattern in neon.md. */
18704
18705 static neon_builtin_datum neon_builtin_data[] =
18706 {
18707 VAR10 (BINOP, vadd,
18708 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18709 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18710 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18711 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18712 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18713 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18714 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18715 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18716 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18717 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18718 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18719 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18720 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18721 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18722 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18723 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18724 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18725 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18726 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18727 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18728 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18729 VAR2 (BINOP, vqdmull, v4hi, v2si),
18730 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18731 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18732 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18733 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18734 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18735 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18736 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18737 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18738 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18739 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18740 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18741 VAR10 (BINOP, vsub,
18742 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18743 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18744 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18745 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18746 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18747 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18748 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18749 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18750 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18751 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18752 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18753 VAR2 (BINOP, vcage, v2sf, v4sf),
18754 VAR2 (BINOP, vcagt, v2sf, v4sf),
18755 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18756 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18757 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18758 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18759 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18760 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18761 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18762 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18763 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18764 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18765 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18766 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18767 VAR2 (BINOP, vrecps, v2sf, v4sf),
18768 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18769 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18770 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18771 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18772 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18773 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18774 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18775 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18776 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18777 VAR2 (UNOP, vcnt, v8qi, v16qi),
18778 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18779 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18780 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18781 /* FIXME: vget_lane supports more variants than this! */
18782 VAR10 (GETLANE, vget_lane,
18783 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18784 VAR10 (SETLANE, vset_lane,
18785 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18786 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18787 VAR10 (DUP, vdup_n,
18788 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18789 VAR10 (DUPLANE, vdup_lane,
18790 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18791 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18792 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18793 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18794 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18795 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18796 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18797 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18798 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18799 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18800 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18801 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18802 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18803 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18804 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18805 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18806 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18807 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18808 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18809 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18810 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18811 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18812 VAR10 (BINOP, vext,
18813 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18814 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18815 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18816 VAR2 (UNOP, vrev16, v8qi, v16qi),
18817 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18818 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18819 VAR10 (SELECT, vbsl,
18820 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18821 VAR1 (VTBL, vtbl1, v8qi),
18822 VAR1 (VTBL, vtbl2, v8qi),
18823 VAR1 (VTBL, vtbl3, v8qi),
18824 VAR1 (VTBL, vtbl4, v8qi),
18825 VAR1 (VTBX, vtbx1, v8qi),
18826 VAR1 (VTBX, vtbx2, v8qi),
18827 VAR1 (VTBX, vtbx3, v8qi),
18828 VAR1 (VTBX, vtbx4, v8qi),
18829 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18830 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18831 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18832 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18833 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18834 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18835 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18836 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18837 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18838 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18839 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18840 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18841 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18842 VAR10 (LOAD1, vld1,
18843 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18844 VAR10 (LOAD1LANE, vld1_lane,
18845 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18846 VAR10 (LOAD1, vld1_dup,
18847 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18848 VAR10 (STORE1, vst1,
18849 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18850 VAR10 (STORE1LANE, vst1_lane,
18851 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18852 VAR9 (LOADSTRUCT,
18853 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18854 VAR7 (LOADSTRUCTLANE, vld2_lane,
18855 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18856 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18857 VAR9 (STORESTRUCT, vst2,
18858 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18859 VAR7 (STORESTRUCTLANE, vst2_lane,
18860 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18861 VAR9 (LOADSTRUCT,
18862 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18863 VAR7 (LOADSTRUCTLANE, vld3_lane,
18864 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18865 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18866 VAR9 (STORESTRUCT, vst3,
18867 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18868 VAR7 (STORESTRUCTLANE, vst3_lane,
18869 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18870 VAR9 (LOADSTRUCT, vld4,
18871 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18872 VAR7 (LOADSTRUCTLANE, vld4_lane,
18873 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18874 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18875 VAR9 (STORESTRUCT, vst4,
18876 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18877 VAR7 (STORESTRUCTLANE, vst4_lane,
18878 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18879 VAR10 (LOGICBINOP, vand,
18880 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18881 VAR10 (LOGICBINOP, vorr,
18882 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18883 VAR10 (BINOP, veor,
18884 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18885 VAR10 (LOGICBINOP, vbic,
18886 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18887 VAR10 (LOGICBINOP, vorn,
18888 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18889 };
18890
18891 #undef CF
18892 #undef VAR1
18893 #undef VAR2
18894 #undef VAR3
18895 #undef VAR4
18896 #undef VAR5
18897 #undef VAR6
18898 #undef VAR7
18899 #undef VAR8
18900 #undef VAR9
18901 #undef VAR10
18902
18903 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18904 symbolic names defined here (which would require too much duplication).
18905 FIXME? */
18906 enum arm_builtins
18907 {
18908 ARM_BUILTIN_GETWCGR0,
18909 ARM_BUILTIN_GETWCGR1,
18910 ARM_BUILTIN_GETWCGR2,
18911 ARM_BUILTIN_GETWCGR3,
18912
18913 ARM_BUILTIN_SETWCGR0,
18914 ARM_BUILTIN_SETWCGR1,
18915 ARM_BUILTIN_SETWCGR2,
18916 ARM_BUILTIN_SETWCGR3,
18917
18918 ARM_BUILTIN_WZERO,
18919
18920 ARM_BUILTIN_WAVG2BR,
18921 ARM_BUILTIN_WAVG2HR,
18922 ARM_BUILTIN_WAVG2B,
18923 ARM_BUILTIN_WAVG2H,
18924
18925 ARM_BUILTIN_WACCB,
18926 ARM_BUILTIN_WACCH,
18927 ARM_BUILTIN_WACCW,
18928
18929 ARM_BUILTIN_WMACS,
18930 ARM_BUILTIN_WMACSZ,
18931 ARM_BUILTIN_WMACU,
18932 ARM_BUILTIN_WMACUZ,
18933
18934 ARM_BUILTIN_WSADB,
18935 ARM_BUILTIN_WSADBZ,
18936 ARM_BUILTIN_WSADH,
18937 ARM_BUILTIN_WSADHZ,
18938
18939 ARM_BUILTIN_WALIGNI,
18940 ARM_BUILTIN_WALIGNR0,
18941 ARM_BUILTIN_WALIGNR1,
18942 ARM_BUILTIN_WALIGNR2,
18943 ARM_BUILTIN_WALIGNR3,
18944
18945 ARM_BUILTIN_TMIA,
18946 ARM_BUILTIN_TMIAPH,
18947 ARM_BUILTIN_TMIABB,
18948 ARM_BUILTIN_TMIABT,
18949 ARM_BUILTIN_TMIATB,
18950 ARM_BUILTIN_TMIATT,
18951
18952 ARM_BUILTIN_TMOVMSKB,
18953 ARM_BUILTIN_TMOVMSKH,
18954 ARM_BUILTIN_TMOVMSKW,
18955
18956 ARM_BUILTIN_TBCSTB,
18957 ARM_BUILTIN_TBCSTH,
18958 ARM_BUILTIN_TBCSTW,
18959
18960 ARM_BUILTIN_WMADDS,
18961 ARM_BUILTIN_WMADDU,
18962
18963 ARM_BUILTIN_WPACKHSS,
18964 ARM_BUILTIN_WPACKWSS,
18965 ARM_BUILTIN_WPACKDSS,
18966 ARM_BUILTIN_WPACKHUS,
18967 ARM_BUILTIN_WPACKWUS,
18968 ARM_BUILTIN_WPACKDUS,
18969
18970 ARM_BUILTIN_WADDB,
18971 ARM_BUILTIN_WADDH,
18972 ARM_BUILTIN_WADDW,
18973 ARM_BUILTIN_WADDSSB,
18974 ARM_BUILTIN_WADDSSH,
18975 ARM_BUILTIN_WADDSSW,
18976 ARM_BUILTIN_WADDUSB,
18977 ARM_BUILTIN_WADDUSH,
18978 ARM_BUILTIN_WADDUSW,
18979 ARM_BUILTIN_WSUBB,
18980 ARM_BUILTIN_WSUBH,
18981 ARM_BUILTIN_WSUBW,
18982 ARM_BUILTIN_WSUBSSB,
18983 ARM_BUILTIN_WSUBSSH,
18984 ARM_BUILTIN_WSUBSSW,
18985 ARM_BUILTIN_WSUBUSB,
18986 ARM_BUILTIN_WSUBUSH,
18987 ARM_BUILTIN_WSUBUSW,
18988
18989 ARM_BUILTIN_WAND,
18990 ARM_BUILTIN_WANDN,
18991 ARM_BUILTIN_WOR,
18992 ARM_BUILTIN_WXOR,
18993
18994 ARM_BUILTIN_WCMPEQB,
18995 ARM_BUILTIN_WCMPEQH,
18996 ARM_BUILTIN_WCMPEQW,
18997 ARM_BUILTIN_WCMPGTUB,
18998 ARM_BUILTIN_WCMPGTUH,
18999 ARM_BUILTIN_WCMPGTUW,
19000 ARM_BUILTIN_WCMPGTSB,
19001 ARM_BUILTIN_WCMPGTSH,
19002 ARM_BUILTIN_WCMPGTSW,
19003
19004 ARM_BUILTIN_TEXTRMSB,
19005 ARM_BUILTIN_TEXTRMSH,
19006 ARM_BUILTIN_TEXTRMSW,
19007 ARM_BUILTIN_TEXTRMUB,
19008 ARM_BUILTIN_TEXTRMUH,
19009 ARM_BUILTIN_TEXTRMUW,
19010 ARM_BUILTIN_TINSRB,
19011 ARM_BUILTIN_TINSRH,
19012 ARM_BUILTIN_TINSRW,
19013
19014 ARM_BUILTIN_WMAXSW,
19015 ARM_BUILTIN_WMAXSH,
19016 ARM_BUILTIN_WMAXSB,
19017 ARM_BUILTIN_WMAXUW,
19018 ARM_BUILTIN_WMAXUH,
19019 ARM_BUILTIN_WMAXUB,
19020 ARM_BUILTIN_WMINSW,
19021 ARM_BUILTIN_WMINSH,
19022 ARM_BUILTIN_WMINSB,
19023 ARM_BUILTIN_WMINUW,
19024 ARM_BUILTIN_WMINUH,
19025 ARM_BUILTIN_WMINUB,
19026
19027 ARM_BUILTIN_WMULUM,
19028 ARM_BUILTIN_WMULSM,
19029 ARM_BUILTIN_WMULUL,
19030
19031 ARM_BUILTIN_PSADBH,
19032 ARM_BUILTIN_WSHUFH,
19033
19034 ARM_BUILTIN_WSLLH,
19035 ARM_BUILTIN_WSLLW,
19036 ARM_BUILTIN_WSLLD,
19037 ARM_BUILTIN_WSRAH,
19038 ARM_BUILTIN_WSRAW,
19039 ARM_BUILTIN_WSRAD,
19040 ARM_BUILTIN_WSRLH,
19041 ARM_BUILTIN_WSRLW,
19042 ARM_BUILTIN_WSRLD,
19043 ARM_BUILTIN_WRORH,
19044 ARM_BUILTIN_WRORW,
19045 ARM_BUILTIN_WRORD,
19046 ARM_BUILTIN_WSLLHI,
19047 ARM_BUILTIN_WSLLWI,
19048 ARM_BUILTIN_WSLLDI,
19049 ARM_BUILTIN_WSRAHI,
19050 ARM_BUILTIN_WSRAWI,
19051 ARM_BUILTIN_WSRADI,
19052 ARM_BUILTIN_WSRLHI,
19053 ARM_BUILTIN_WSRLWI,
19054 ARM_BUILTIN_WSRLDI,
19055 ARM_BUILTIN_WRORHI,
19056 ARM_BUILTIN_WRORWI,
19057 ARM_BUILTIN_WRORDI,
19058
19059 ARM_BUILTIN_WUNPCKIHB,
19060 ARM_BUILTIN_WUNPCKIHH,
19061 ARM_BUILTIN_WUNPCKIHW,
19062 ARM_BUILTIN_WUNPCKILB,
19063 ARM_BUILTIN_WUNPCKILH,
19064 ARM_BUILTIN_WUNPCKILW,
19065
19066 ARM_BUILTIN_WUNPCKEHSB,
19067 ARM_BUILTIN_WUNPCKEHSH,
19068 ARM_BUILTIN_WUNPCKEHSW,
19069 ARM_BUILTIN_WUNPCKEHUB,
19070 ARM_BUILTIN_WUNPCKEHUH,
19071 ARM_BUILTIN_WUNPCKEHUW,
19072 ARM_BUILTIN_WUNPCKELSB,
19073 ARM_BUILTIN_WUNPCKELSH,
19074 ARM_BUILTIN_WUNPCKELSW,
19075 ARM_BUILTIN_WUNPCKELUB,
19076 ARM_BUILTIN_WUNPCKELUH,
19077 ARM_BUILTIN_WUNPCKELUW,
19078
19079 ARM_BUILTIN_WABSB,
19080 ARM_BUILTIN_WABSH,
19081 ARM_BUILTIN_WABSW,
19082
19083 ARM_BUILTIN_WADDSUBHX,
19084 ARM_BUILTIN_WSUBADDHX,
19085
19086 ARM_BUILTIN_WABSDIFFB,
19087 ARM_BUILTIN_WABSDIFFH,
19088 ARM_BUILTIN_WABSDIFFW,
19089
19090 ARM_BUILTIN_WADDCH,
19091 ARM_BUILTIN_WADDCW,
19092
19093 ARM_BUILTIN_WAVG4,
19094 ARM_BUILTIN_WAVG4R,
19095
19096 ARM_BUILTIN_WMADDSX,
19097 ARM_BUILTIN_WMADDUX,
19098
19099 ARM_BUILTIN_WMADDSN,
19100 ARM_BUILTIN_WMADDUN,
19101
19102 ARM_BUILTIN_WMULWSM,
19103 ARM_BUILTIN_WMULWUM,
19104
19105 ARM_BUILTIN_WMULWSMR,
19106 ARM_BUILTIN_WMULWUMR,
19107
19108 ARM_BUILTIN_WMULWL,
19109
19110 ARM_BUILTIN_WMULSMR,
19111 ARM_BUILTIN_WMULUMR,
19112
19113 ARM_BUILTIN_WQMULM,
19114 ARM_BUILTIN_WQMULMR,
19115
19116 ARM_BUILTIN_WQMULWM,
19117 ARM_BUILTIN_WQMULWMR,
19118
19119 ARM_BUILTIN_WADDBHUSM,
19120 ARM_BUILTIN_WADDBHUSL,
19121
19122 ARM_BUILTIN_WQMIABB,
19123 ARM_BUILTIN_WQMIABT,
19124 ARM_BUILTIN_WQMIATB,
19125 ARM_BUILTIN_WQMIATT,
19126
19127 ARM_BUILTIN_WQMIABBN,
19128 ARM_BUILTIN_WQMIABTN,
19129 ARM_BUILTIN_WQMIATBN,
19130 ARM_BUILTIN_WQMIATTN,
19131
19132 ARM_BUILTIN_WMIABB,
19133 ARM_BUILTIN_WMIABT,
19134 ARM_BUILTIN_WMIATB,
19135 ARM_BUILTIN_WMIATT,
19136
19137 ARM_BUILTIN_WMIABBN,
19138 ARM_BUILTIN_WMIABTN,
19139 ARM_BUILTIN_WMIATBN,
19140 ARM_BUILTIN_WMIATTN,
19141
19142 ARM_BUILTIN_WMIAWBB,
19143 ARM_BUILTIN_WMIAWBT,
19144 ARM_BUILTIN_WMIAWTB,
19145 ARM_BUILTIN_WMIAWTT,
19146
19147 ARM_BUILTIN_WMIAWBBN,
19148 ARM_BUILTIN_WMIAWBTN,
19149 ARM_BUILTIN_WMIAWTBN,
19150 ARM_BUILTIN_WMIAWTTN,
19151
19152 ARM_BUILTIN_WMERGE,
19153
19154 ARM_BUILTIN_THREAD_POINTER,
19155
19156 ARM_BUILTIN_NEON_BASE,
19157
19158 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19159 };
19160
19161 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19162
19163 static void
19164 arm_init_neon_builtins (void)
19165 {
19166 unsigned int i, fcode;
19167 tree decl;
19168
19169 tree neon_intQI_type_node;
19170 tree neon_intHI_type_node;
19171 tree neon_polyQI_type_node;
19172 tree neon_polyHI_type_node;
19173 tree neon_intSI_type_node;
19174 tree neon_intDI_type_node;
19175 tree neon_float_type_node;
19176
19177 tree intQI_pointer_node;
19178 tree intHI_pointer_node;
19179 tree intSI_pointer_node;
19180 tree intDI_pointer_node;
19181 tree float_pointer_node;
19182
19183 tree const_intQI_node;
19184 tree const_intHI_node;
19185 tree const_intSI_node;
19186 tree const_intDI_node;
19187 tree const_float_node;
19188
19189 tree const_intQI_pointer_node;
19190 tree const_intHI_pointer_node;
19191 tree const_intSI_pointer_node;
19192 tree const_intDI_pointer_node;
19193 tree const_float_pointer_node;
19194
19195 tree V8QI_type_node;
19196 tree V4HI_type_node;
19197 tree V2SI_type_node;
19198 tree V2SF_type_node;
19199 tree V16QI_type_node;
19200 tree V8HI_type_node;
19201 tree V4SI_type_node;
19202 tree V4SF_type_node;
19203 tree V2DI_type_node;
19204
19205 tree intUQI_type_node;
19206 tree intUHI_type_node;
19207 tree intUSI_type_node;
19208 tree intUDI_type_node;
19209
19210 tree intEI_type_node;
19211 tree intOI_type_node;
19212 tree intCI_type_node;
19213 tree intXI_type_node;
19214
19215 tree V8QI_pointer_node;
19216 tree V4HI_pointer_node;
19217 tree V2SI_pointer_node;
19218 tree V2SF_pointer_node;
19219 tree V16QI_pointer_node;
19220 tree V8HI_pointer_node;
19221 tree V4SI_pointer_node;
19222 tree V4SF_pointer_node;
19223 tree V2DI_pointer_node;
19224
19225 tree void_ftype_pv8qi_v8qi_v8qi;
19226 tree void_ftype_pv4hi_v4hi_v4hi;
19227 tree void_ftype_pv2si_v2si_v2si;
19228 tree void_ftype_pv2sf_v2sf_v2sf;
19229 tree void_ftype_pdi_di_di;
19230 tree void_ftype_pv16qi_v16qi_v16qi;
19231 tree void_ftype_pv8hi_v8hi_v8hi;
19232 tree void_ftype_pv4si_v4si_v4si;
19233 tree void_ftype_pv4sf_v4sf_v4sf;
19234 tree void_ftype_pv2di_v2di_v2di;
19235
19236 tree reinterp_ftype_dreg[5][5];
19237 tree reinterp_ftype_qreg[5][5];
19238 tree dreg_types[5], qreg_types[5];
19239
19240 /* Create distinguished type nodes for NEON vector element types,
19241 and pointers to values of such types, so we can detect them later. */
19242 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19243 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19244 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19245 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19246 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19247 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19248 neon_float_type_node = make_node (REAL_TYPE);
19249 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19250 layout_type (neon_float_type_node);
19251
19252 /* Define typedefs which exactly correspond to the modes we are basing vector
19253 types on. If you change these names you'll need to change
19254 the table used by arm_mangle_type too. */
19255 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19256 "__builtin_neon_qi");
19257 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19258 "__builtin_neon_hi");
19259 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19260 "__builtin_neon_si");
19261 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19262 "__builtin_neon_sf");
19263 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19264 "__builtin_neon_di");
19265 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19266 "__builtin_neon_poly8");
19267 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19268 "__builtin_neon_poly16");
19269
19270 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19271 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19272 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19273 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19274 float_pointer_node = build_pointer_type (neon_float_type_node);
19275
19276 /* Next create constant-qualified versions of the above types. */
19277 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19278 TYPE_QUAL_CONST);
19279 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19280 TYPE_QUAL_CONST);
19281 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19282 TYPE_QUAL_CONST);
19283 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19284 TYPE_QUAL_CONST);
19285 const_float_node = build_qualified_type (neon_float_type_node,
19286 TYPE_QUAL_CONST);
19287
19288 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19289 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19290 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19291 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19292 const_float_pointer_node = build_pointer_type (const_float_node);
19293
19294 /* Now create vector types based on our NEON element types. */
19295 /* 64-bit vectors. */
19296 V8QI_type_node =
19297 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19298 V4HI_type_node =
19299 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19300 V2SI_type_node =
19301 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19302 V2SF_type_node =
19303 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19304 /* 128-bit vectors. */
19305 V16QI_type_node =
19306 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19307 V8HI_type_node =
19308 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19309 V4SI_type_node =
19310 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19311 V4SF_type_node =
19312 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19313 V2DI_type_node =
19314 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19315
19316 /* Unsigned integer types for various mode sizes. */
19317 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19318 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19319 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19320 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19321
19322 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19323 "__builtin_neon_uqi");
19324 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19325 "__builtin_neon_uhi");
19326 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19327 "__builtin_neon_usi");
19328 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19329 "__builtin_neon_udi");
19330
19331 /* Opaque integer types for structures of vectors. */
19332 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19333 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19334 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19335 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19336
19337 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19338 "__builtin_neon_ti");
19339 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19340 "__builtin_neon_ei");
19341 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19342 "__builtin_neon_oi");
19343 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19344 "__builtin_neon_ci");
19345 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19346 "__builtin_neon_xi");
19347
19348 /* Pointers to vector types. */
19349 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19350 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19351 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19352 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19353 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19354 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19355 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19356 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19357 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19358
19359 /* Operations which return results as pairs. */
19360 void_ftype_pv8qi_v8qi_v8qi =
19361 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19362 V8QI_type_node, NULL);
19363 void_ftype_pv4hi_v4hi_v4hi =
19364 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19365 V4HI_type_node, NULL);
19366 void_ftype_pv2si_v2si_v2si =
19367 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19368 V2SI_type_node, NULL);
19369 void_ftype_pv2sf_v2sf_v2sf =
19370 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19371 V2SF_type_node, NULL);
19372 void_ftype_pdi_di_di =
19373 build_function_type_list (void_type_node, intDI_pointer_node,
19374 neon_intDI_type_node, neon_intDI_type_node, NULL);
19375 void_ftype_pv16qi_v16qi_v16qi =
19376 build_function_type_list (void_type_node, V16QI_pointer_node,
19377 V16QI_type_node, V16QI_type_node, NULL);
19378 void_ftype_pv8hi_v8hi_v8hi =
19379 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19380 V8HI_type_node, NULL);
19381 void_ftype_pv4si_v4si_v4si =
19382 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19383 V4SI_type_node, NULL);
19384 void_ftype_pv4sf_v4sf_v4sf =
19385 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19386 V4SF_type_node, NULL);
19387 void_ftype_pv2di_v2di_v2di =
19388 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19389 V2DI_type_node, NULL);
19390
19391 dreg_types[0] = V8QI_type_node;
19392 dreg_types[1] = V4HI_type_node;
19393 dreg_types[2] = V2SI_type_node;
19394 dreg_types[3] = V2SF_type_node;
19395 dreg_types[4] = neon_intDI_type_node;
19396
19397 qreg_types[0] = V16QI_type_node;
19398 qreg_types[1] = V8HI_type_node;
19399 qreg_types[2] = V4SI_type_node;
19400 qreg_types[3] = V4SF_type_node;
19401 qreg_types[4] = V2DI_type_node;
19402
19403 for (i = 0; i < 5; i++)
19404 {
19405 int j;
19406 for (j = 0; j < 5; j++)
19407 {
19408 reinterp_ftype_dreg[i][j]
19409 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19410 reinterp_ftype_qreg[i][j]
19411 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19412 }
19413 }
19414
19415 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19416 i < ARRAY_SIZE (neon_builtin_data);
19417 i++, fcode++)
19418 {
19419 neon_builtin_datum *d = &neon_builtin_data[i];
19420
19421 const char* const modenames[] = {
19422 "v8qi", "v4hi", "v2si", "v2sf", "di",
19423 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19424 "ti", "ei", "oi"
19425 };
19426 char namebuf[60];
19427 tree ftype = NULL;
19428 int is_load = 0, is_store = 0;
19429
19430 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19431
19432 d->fcode = fcode;
19433
19434 switch (d->itype)
19435 {
19436 case NEON_LOAD1:
19437 case NEON_LOAD1LANE:
19438 case NEON_LOADSTRUCT:
19439 case NEON_LOADSTRUCTLANE:
19440 is_load = 1;
19441 /* Fall through. */
19442 case NEON_STORE1:
19443 case NEON_STORE1LANE:
19444 case NEON_STORESTRUCT:
19445 case NEON_STORESTRUCTLANE:
19446 if (!is_load)
19447 is_store = 1;
19448 /* Fall through. */
19449 case NEON_UNOP:
19450 case NEON_BINOP:
19451 case NEON_LOGICBINOP:
19452 case NEON_SHIFTINSERT:
19453 case NEON_TERNOP:
19454 case NEON_GETLANE:
19455 case NEON_SETLANE:
19456 case NEON_CREATE:
19457 case NEON_DUP:
19458 case NEON_DUPLANE:
19459 case NEON_SHIFTIMM:
19460 case NEON_SHIFTACC:
19461 case NEON_COMBINE:
19462 case NEON_SPLIT:
19463 case NEON_CONVERT:
19464 case NEON_FIXCONV:
19465 case NEON_LANEMUL:
19466 case NEON_LANEMULL:
19467 case NEON_LANEMULH:
19468 case NEON_LANEMAC:
19469 case NEON_SCALARMUL:
19470 case NEON_SCALARMULL:
19471 case NEON_SCALARMULH:
19472 case NEON_SCALARMAC:
19473 case NEON_SELECT:
19474 case NEON_VTBL:
19475 case NEON_VTBX:
19476 {
19477 int k;
19478 tree return_type = void_type_node, args = void_list_node;
19479
19480 /* Build a function type directly from the insn_data for
19481 this builtin. The build_function_type() function takes
19482 care of removing duplicates for us. */
19483 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19484 {
19485 tree eltype;
19486
19487 if (is_load && k == 1)
19488 {
19489 /* Neon load patterns always have the memory
19490 operand in the operand 1 position. */
19491 gcc_assert (insn_data[d->code].operand[k].predicate
19492 == neon_struct_operand);
19493
19494 switch (d->mode)
19495 {
19496 case T_V8QI:
19497 case T_V16QI:
19498 eltype = const_intQI_pointer_node;
19499 break;
19500
19501 case T_V4HI:
19502 case T_V8HI:
19503 eltype = const_intHI_pointer_node;
19504 break;
19505
19506 case T_V2SI:
19507 case T_V4SI:
19508 eltype = const_intSI_pointer_node;
19509 break;
19510
19511 case T_V2SF:
19512 case T_V4SF:
19513 eltype = const_float_pointer_node;
19514 break;
19515
19516 case T_DI:
19517 case T_V2DI:
19518 eltype = const_intDI_pointer_node;
19519 break;
19520
19521 default: gcc_unreachable ();
19522 }
19523 }
19524 else if (is_store && k == 0)
19525 {
19526 /* Similarly, Neon store patterns use operand 0 as
19527 the memory location to store to. */
19528 gcc_assert (insn_data[d->code].operand[k].predicate
19529 == neon_struct_operand);
19530
19531 switch (d->mode)
19532 {
19533 case T_V8QI:
19534 case T_V16QI:
19535 eltype = intQI_pointer_node;
19536 break;
19537
19538 case T_V4HI:
19539 case T_V8HI:
19540 eltype = intHI_pointer_node;
19541 break;
19542
19543 case T_V2SI:
19544 case T_V4SI:
19545 eltype = intSI_pointer_node;
19546 break;
19547
19548 case T_V2SF:
19549 case T_V4SF:
19550 eltype = float_pointer_node;
19551 break;
19552
19553 case T_DI:
19554 case T_V2DI:
19555 eltype = intDI_pointer_node;
19556 break;
19557
19558 default: gcc_unreachable ();
19559 }
19560 }
19561 else
19562 {
19563 switch (insn_data[d->code].operand[k].mode)
19564 {
19565 case VOIDmode: eltype = void_type_node; break;
19566 /* Scalars. */
19567 case QImode: eltype = neon_intQI_type_node; break;
19568 case HImode: eltype = neon_intHI_type_node; break;
19569 case SImode: eltype = neon_intSI_type_node; break;
19570 case SFmode: eltype = neon_float_type_node; break;
19571 case DImode: eltype = neon_intDI_type_node; break;
19572 case TImode: eltype = intTI_type_node; break;
19573 case EImode: eltype = intEI_type_node; break;
19574 case OImode: eltype = intOI_type_node; break;
19575 case CImode: eltype = intCI_type_node; break;
19576 case XImode: eltype = intXI_type_node; break;
19577 /* 64-bit vectors. */
19578 case V8QImode: eltype = V8QI_type_node; break;
19579 case V4HImode: eltype = V4HI_type_node; break;
19580 case V2SImode: eltype = V2SI_type_node; break;
19581 case V2SFmode: eltype = V2SF_type_node; break;
19582 /* 128-bit vectors. */
19583 case V16QImode: eltype = V16QI_type_node; break;
19584 case V8HImode: eltype = V8HI_type_node; break;
19585 case V4SImode: eltype = V4SI_type_node; break;
19586 case V4SFmode: eltype = V4SF_type_node; break;
19587 case V2DImode: eltype = V2DI_type_node; break;
19588 default: gcc_unreachable ();
19589 }
19590 }
19591
19592 if (k == 0 && !is_store)
19593 return_type = eltype;
19594 else
19595 args = tree_cons (NULL_TREE, eltype, args);
19596 }
19597
19598 ftype = build_function_type (return_type, args);
19599 }
19600 break;
19601
19602 case NEON_RESULTPAIR:
19603 {
19604 switch (insn_data[d->code].operand[1].mode)
19605 {
19606 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19607 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19608 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19609 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19610 case DImode: ftype = void_ftype_pdi_di_di; break;
19611 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19612 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19613 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19614 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19615 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19616 default: gcc_unreachable ();
19617 }
19618 }
19619 break;
19620
19621 case NEON_REINTERP:
19622 {
19623 /* We iterate over 5 doubleword types, then 5 quadword
19624 types. */
19625 int rhs = d->mode % 5;
19626 switch (insn_data[d->code].operand[0].mode)
19627 {
19628 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19629 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19630 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19631 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19632 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19633 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19634 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19635 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19636 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19637 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19638 default: gcc_unreachable ();
19639 }
19640 }
19641 break;
19642
19643 default:
19644 gcc_unreachable ();
19645 }
19646
19647 gcc_assert (ftype != NULL);
19648
19649 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19650
19651 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19652 NULL_TREE);
19653 arm_builtin_decls[fcode] = decl;
19654 }
19655 }
19656
19657 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19658 do \
19659 { \
19660 if ((MASK) & insn_flags) \
19661 { \
19662 tree bdecl; \
19663 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19664 BUILT_IN_MD, NULL, NULL_TREE); \
19665 arm_builtin_decls[CODE] = bdecl; \
19666 } \
19667 } \
19668 while (0)
19669
19670 struct builtin_description
19671 {
19672 const unsigned int mask;
19673 const enum insn_code icode;
19674 const char * const name;
19675 const enum arm_builtins code;
19676 const enum rtx_code comparison;
19677 const unsigned int flag;
19678 };
19679
19680 static const struct builtin_description bdesc_2arg[] =
19681 {
19682 #define IWMMXT_BUILTIN(code, string, builtin) \
19683 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19684 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19685
19686 #define IWMMXT2_BUILTIN(code, string, builtin) \
19687 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19688 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19689
19690 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19691 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19692 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19693 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19694 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19695 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19696 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19697 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19698 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19699 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19700 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19701 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19702 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19703 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19704 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19705 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19706 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19707 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19708 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19709 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19710 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19711 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19712 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19713 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19714 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19715 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19716 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19717 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19718 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19719 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19720 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19721 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19722 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19723 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19724 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19725 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19726 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19727 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19728 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19729 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19730 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19731 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19732 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19733 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19734 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19735 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19736 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19737 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19738 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19739 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19740 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19741 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19742 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19743 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19744 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19745 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19746 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19747 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19748 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19749 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19750 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19751 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19752 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19753 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19754 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19755 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19756 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19757 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19758 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19759 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19760 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19761 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19762 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19763 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19764 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19765 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19766 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19767 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19768
19769 #define IWMMXT_BUILTIN2(code, builtin) \
19770 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19771
19772 #define IWMMXT2_BUILTIN2(code, builtin) \
19773 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19774
19775 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19776 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19777 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19778 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19779 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19780 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19781 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19782 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19783 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19784 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19785 };
19786
19787 static const struct builtin_description bdesc_1arg[] =
19788 {
19789 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19790 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19791 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19792 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19793 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19794 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19795 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19796 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19797 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19798 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19799 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19800 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19801 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19802 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19803 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19804 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19805 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19806 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19807 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19808 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19809 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19810 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19811 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19812 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19813 };
19814
19815 /* Set up all the iWMMXt builtins. This is not called if
19816 TARGET_IWMMXT is zero. */
19817
19818 static void
19819 arm_init_iwmmxt_builtins (void)
19820 {
19821 const struct builtin_description * d;
19822 size_t i;
19823
19824 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19825 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19826 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19827
19828 tree v8qi_ftype_v8qi_v8qi_int
19829 = build_function_type_list (V8QI_type_node,
19830 V8QI_type_node, V8QI_type_node,
19831 integer_type_node, NULL_TREE);
19832 tree v4hi_ftype_v4hi_int
19833 = build_function_type_list (V4HI_type_node,
19834 V4HI_type_node, integer_type_node, NULL_TREE);
19835 tree v2si_ftype_v2si_int
19836 = build_function_type_list (V2SI_type_node,
19837 V2SI_type_node, integer_type_node, NULL_TREE);
19838 tree v2si_ftype_di_di
19839 = build_function_type_list (V2SI_type_node,
19840 long_long_integer_type_node,
19841 long_long_integer_type_node,
19842 NULL_TREE);
19843 tree di_ftype_di_int
19844 = build_function_type_list (long_long_integer_type_node,
19845 long_long_integer_type_node,
19846 integer_type_node, NULL_TREE);
19847 tree di_ftype_di_int_int
19848 = build_function_type_list (long_long_integer_type_node,
19849 long_long_integer_type_node,
19850 integer_type_node,
19851 integer_type_node, NULL_TREE);
19852 tree int_ftype_v8qi
19853 = build_function_type_list (integer_type_node,
19854 V8QI_type_node, NULL_TREE);
19855 tree int_ftype_v4hi
19856 = build_function_type_list (integer_type_node,
19857 V4HI_type_node, NULL_TREE);
19858 tree int_ftype_v2si
19859 = build_function_type_list (integer_type_node,
19860 V2SI_type_node, NULL_TREE);
19861 tree int_ftype_v8qi_int
19862 = build_function_type_list (integer_type_node,
19863 V8QI_type_node, integer_type_node, NULL_TREE);
19864 tree int_ftype_v4hi_int
19865 = build_function_type_list (integer_type_node,
19866 V4HI_type_node, integer_type_node, NULL_TREE);
19867 tree int_ftype_v2si_int
19868 = build_function_type_list (integer_type_node,
19869 V2SI_type_node, integer_type_node, NULL_TREE);
19870 tree v8qi_ftype_v8qi_int_int
19871 = build_function_type_list (V8QI_type_node,
19872 V8QI_type_node, integer_type_node,
19873 integer_type_node, NULL_TREE);
19874 tree v4hi_ftype_v4hi_int_int
19875 = build_function_type_list (V4HI_type_node,
19876 V4HI_type_node, integer_type_node,
19877 integer_type_node, NULL_TREE);
19878 tree v2si_ftype_v2si_int_int
19879 = build_function_type_list (V2SI_type_node,
19880 V2SI_type_node, integer_type_node,
19881 integer_type_node, NULL_TREE);
19882 /* Miscellaneous. */
19883 tree v8qi_ftype_v4hi_v4hi
19884 = build_function_type_list (V8QI_type_node,
19885 V4HI_type_node, V4HI_type_node, NULL_TREE);
19886 tree v4hi_ftype_v2si_v2si
19887 = build_function_type_list (V4HI_type_node,
19888 V2SI_type_node, V2SI_type_node, NULL_TREE);
19889 tree v8qi_ftype_v4hi_v8qi
19890 = build_function_type_list (V8QI_type_node,
19891 V4HI_type_node, V8QI_type_node, NULL_TREE);
19892 tree v2si_ftype_v4hi_v4hi
19893 = build_function_type_list (V2SI_type_node,
19894 V4HI_type_node, V4HI_type_node, NULL_TREE);
19895 tree v2si_ftype_v8qi_v8qi
19896 = build_function_type_list (V2SI_type_node,
19897 V8QI_type_node, V8QI_type_node, NULL_TREE);
19898 tree v4hi_ftype_v4hi_di
19899 = build_function_type_list (V4HI_type_node,
19900 V4HI_type_node, long_long_integer_type_node,
19901 NULL_TREE);
19902 tree v2si_ftype_v2si_di
19903 = build_function_type_list (V2SI_type_node,
19904 V2SI_type_node, long_long_integer_type_node,
19905 NULL_TREE);
19906 tree di_ftype_void
19907 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19908 tree int_ftype_void
19909 = build_function_type_list (integer_type_node, NULL_TREE);
19910 tree di_ftype_v8qi
19911 = build_function_type_list (long_long_integer_type_node,
19912 V8QI_type_node, NULL_TREE);
19913 tree di_ftype_v4hi
19914 = build_function_type_list (long_long_integer_type_node,
19915 V4HI_type_node, NULL_TREE);
19916 tree di_ftype_v2si
19917 = build_function_type_list (long_long_integer_type_node,
19918 V2SI_type_node, NULL_TREE);
19919 tree v2si_ftype_v4hi
19920 = build_function_type_list (V2SI_type_node,
19921 V4HI_type_node, NULL_TREE);
19922 tree v4hi_ftype_v8qi
19923 = build_function_type_list (V4HI_type_node,
19924 V8QI_type_node, NULL_TREE);
19925 tree v8qi_ftype_v8qi
19926 = build_function_type_list (V8QI_type_node,
19927 V8QI_type_node, NULL_TREE);
19928 tree v4hi_ftype_v4hi
19929 = build_function_type_list (V4HI_type_node,
19930 V4HI_type_node, NULL_TREE);
19931 tree v2si_ftype_v2si
19932 = build_function_type_list (V2SI_type_node,
19933 V2SI_type_node, NULL_TREE);
19934
19935 tree di_ftype_di_v4hi_v4hi
19936 = build_function_type_list (long_long_unsigned_type_node,
19937 long_long_unsigned_type_node,
19938 V4HI_type_node, V4HI_type_node,
19939 NULL_TREE);
19940
19941 tree di_ftype_v4hi_v4hi
19942 = build_function_type_list (long_long_unsigned_type_node,
19943 V4HI_type_node,V4HI_type_node,
19944 NULL_TREE);
19945
19946 tree v2si_ftype_v2si_v4hi_v4hi
19947 = build_function_type_list (V2SI_type_node,
19948 V2SI_type_node, V4HI_type_node,
19949 V4HI_type_node, NULL_TREE);
19950
19951 tree v2si_ftype_v2si_v8qi_v8qi
19952 = build_function_type_list (V2SI_type_node,
19953 V2SI_type_node, V8QI_type_node,
19954 V8QI_type_node, NULL_TREE);
19955
19956 tree di_ftype_di_v2si_v2si
19957 = build_function_type_list (long_long_unsigned_type_node,
19958 long_long_unsigned_type_node,
19959 V2SI_type_node, V2SI_type_node,
19960 NULL_TREE);
19961
19962 tree di_ftype_di_di_int
19963 = build_function_type_list (long_long_unsigned_type_node,
19964 long_long_unsigned_type_node,
19965 long_long_unsigned_type_node,
19966 integer_type_node, NULL_TREE);
19967
19968 tree void_ftype_int
19969 = build_function_type_list (void_type_node,
19970 integer_type_node, NULL_TREE);
19971
19972 tree v8qi_ftype_char
19973 = build_function_type_list (V8QI_type_node,
19974 signed_char_type_node, NULL_TREE);
19975
19976 tree v4hi_ftype_short
19977 = build_function_type_list (V4HI_type_node,
19978 short_integer_type_node, NULL_TREE);
19979
19980 tree v2si_ftype_int
19981 = build_function_type_list (V2SI_type_node,
19982 integer_type_node, NULL_TREE);
19983
19984 /* Normal vector binops. */
19985 tree v8qi_ftype_v8qi_v8qi
19986 = build_function_type_list (V8QI_type_node,
19987 V8QI_type_node, V8QI_type_node, NULL_TREE);
19988 tree v4hi_ftype_v4hi_v4hi
19989 = build_function_type_list (V4HI_type_node,
19990 V4HI_type_node,V4HI_type_node, NULL_TREE);
19991 tree v2si_ftype_v2si_v2si
19992 = build_function_type_list (V2SI_type_node,
19993 V2SI_type_node, V2SI_type_node, NULL_TREE);
19994 tree di_ftype_di_di
19995 = build_function_type_list (long_long_unsigned_type_node,
19996 long_long_unsigned_type_node,
19997 long_long_unsigned_type_node,
19998 NULL_TREE);
19999
20000 /* Add all builtins that are more or less simple operations on two
20001 operands. */
20002 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20003 {
20004 /* Use one of the operands; the target can have a different mode for
20005 mask-generating compares. */
20006 enum machine_mode mode;
20007 tree type;
20008
20009 if (d->name == 0)
20010 continue;
20011
20012 mode = insn_data[d->icode].operand[1].mode;
20013
20014 switch (mode)
20015 {
20016 case V8QImode:
20017 type = v8qi_ftype_v8qi_v8qi;
20018 break;
20019 case V4HImode:
20020 type = v4hi_ftype_v4hi_v4hi;
20021 break;
20022 case V2SImode:
20023 type = v2si_ftype_v2si_v2si;
20024 break;
20025 case DImode:
20026 type = di_ftype_di_di;
20027 break;
20028
20029 default:
20030 gcc_unreachable ();
20031 }
20032
20033 def_mbuiltin (d->mask, d->name, type, d->code);
20034 }
20035
20036 /* Add the remaining MMX insns with somewhat more complicated types. */
20037 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20039 ARM_BUILTIN_ ## CODE)
20040
20041 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20042 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20043 ARM_BUILTIN_ ## CODE)
20044
20045 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20046 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20047 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20048 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20049 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20050 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20051 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20052 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20053 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20054
20055 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20056 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20057 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20058 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20059 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20060 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20061
20062 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20063 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20064 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20065 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20066 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20067 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20068
20069 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20070 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20071 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20072 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20073 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20074 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20075
20076 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20077 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20078 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20079 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20080 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20081 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20082
20083 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20084
20085 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20086 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20087 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20088 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20089 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20090 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20091 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20092 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20093 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20094 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20095
20096 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20097 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20098 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20099 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20100 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20101 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20102 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20103 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20104 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20105
20106 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20107 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20108 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20109
20110 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20111 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20112 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20113
20114 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20115 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20116
20117 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20118 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20119 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20120 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20121 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20122 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20123
20124 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20125 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20126 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20127 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20128 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20129 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20130 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20131 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20132 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20133 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20134 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20135 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20136
20137 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20138 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20139 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20140 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20141
20142 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20143 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20144 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20145 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20146 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20147 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20148 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20149
20150 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20151 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20152 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20153
20154 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20155 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20156 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20157 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20158
20159 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20160 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20161 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20162 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20163
20164 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20165 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20166 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20167 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20168
20169 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20170 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20171 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20172 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20173
20174 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20175 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20176 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20177 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20178
20179 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20180 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20181 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20182 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20183
20184 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20185
20186 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20187 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20188 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20189
20190 #undef iwmmx_mbuiltin
20191 #undef iwmmx2_mbuiltin
20192 }
20193
20194 static void
20195 arm_init_tls_builtins (void)
20196 {
20197 tree ftype, decl;
20198
20199 ftype = build_function_type (ptr_type_node, void_list_node);
20200 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20201 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20202 NULL, NULL_TREE);
20203 TREE_NOTHROW (decl) = 1;
20204 TREE_READONLY (decl) = 1;
20205 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20206 }
20207
20208 static void
20209 arm_init_fp16_builtins (void)
20210 {
20211 tree fp16_type = make_node (REAL_TYPE);
20212 TYPE_PRECISION (fp16_type) = 16;
20213 layout_type (fp16_type);
20214 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20215 }
20216
20217 static void
20218 arm_init_builtins (void)
20219 {
20220 arm_init_tls_builtins ();
20221
20222 if (TARGET_REALLY_IWMMXT)
20223 arm_init_iwmmxt_builtins ();
20224
20225 if (TARGET_NEON)
20226 arm_init_neon_builtins ();
20227
20228 if (arm_fp16_format)
20229 arm_init_fp16_builtins ();
20230 }
20231
20232 /* Return the ARM builtin for CODE. */
20233
20234 static tree
20235 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20236 {
20237 if (code >= ARM_BUILTIN_MAX)
20238 return error_mark_node;
20239
20240 return arm_builtin_decls[code];
20241 }
20242
20243 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20244
20245 static const char *
20246 arm_invalid_parameter_type (const_tree t)
20247 {
20248 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20249 return N_("function parameters cannot have __fp16 type");
20250 return NULL;
20251 }
20252
20253 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20254
20255 static const char *
20256 arm_invalid_return_type (const_tree t)
20257 {
20258 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20259 return N_("functions cannot return __fp16 type");
20260 return NULL;
20261 }
20262
20263 /* Implement TARGET_PROMOTED_TYPE. */
20264
20265 static tree
20266 arm_promoted_type (const_tree t)
20267 {
20268 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20269 return float_type_node;
20270 return NULL_TREE;
20271 }
20272
20273 /* Implement TARGET_CONVERT_TO_TYPE.
20274 Specifically, this hook implements the peculiarity of the ARM
20275 half-precision floating-point C semantics that requires conversions between
20276 __fp16 to or from double to do an intermediate conversion to float. */
20277
20278 static tree
20279 arm_convert_to_type (tree type, tree expr)
20280 {
20281 tree fromtype = TREE_TYPE (expr);
20282 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20283 return NULL_TREE;
20284 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20285 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20286 return convert (type, convert (float_type_node, expr));
20287 return NULL_TREE;
20288 }
20289
20290 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20291 This simply adds HFmode as a supported mode; even though we don't
20292 implement arithmetic on this type directly, it's supported by
20293 optabs conversions, much the way the double-word arithmetic is
20294 special-cased in the default hook. */
20295
20296 static bool
20297 arm_scalar_mode_supported_p (enum machine_mode mode)
20298 {
20299 if (mode == HFmode)
20300 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20301 else if (ALL_FIXED_POINT_MODE_P (mode))
20302 return true;
20303 else
20304 return default_scalar_mode_supported_p (mode);
20305 }
20306
20307 /* Errors in the source file can cause expand_expr to return const0_rtx
20308 where we expect a vector. To avoid crashing, use one of the vector
20309 clear instructions. */
20310
20311 static rtx
20312 safe_vector_operand (rtx x, enum machine_mode mode)
20313 {
20314 if (x != const0_rtx)
20315 return x;
20316 x = gen_reg_rtx (mode);
20317
20318 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20319 : gen_rtx_SUBREG (DImode, x, 0)));
20320 return x;
20321 }
20322
20323 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20324
20325 static rtx
20326 arm_expand_binop_builtin (enum insn_code icode,
20327 tree exp, rtx target)
20328 {
20329 rtx pat;
20330 tree arg0 = CALL_EXPR_ARG (exp, 0);
20331 tree arg1 = CALL_EXPR_ARG (exp, 1);
20332 rtx op0 = expand_normal (arg0);
20333 rtx op1 = expand_normal (arg1);
20334 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20335 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20336 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20337
20338 if (VECTOR_MODE_P (mode0))
20339 op0 = safe_vector_operand (op0, mode0);
20340 if (VECTOR_MODE_P (mode1))
20341 op1 = safe_vector_operand (op1, mode1);
20342
20343 if (! target
20344 || GET_MODE (target) != tmode
20345 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20346 target = gen_reg_rtx (tmode);
20347
20348 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20349 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20350
20351 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20352 op0 = copy_to_mode_reg (mode0, op0);
20353 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20354 op1 = copy_to_mode_reg (mode1, op1);
20355
20356 pat = GEN_FCN (icode) (target, op0, op1);
20357 if (! pat)
20358 return 0;
20359 emit_insn (pat);
20360 return target;
20361 }
20362
20363 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20364
20365 static rtx
20366 arm_expand_unop_builtin (enum insn_code icode,
20367 tree exp, rtx target, int do_load)
20368 {
20369 rtx pat;
20370 tree arg0 = CALL_EXPR_ARG (exp, 0);
20371 rtx op0 = expand_normal (arg0);
20372 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20373 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20374
20375 if (! target
20376 || GET_MODE (target) != tmode
20377 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20378 target = gen_reg_rtx (tmode);
20379 if (do_load)
20380 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20381 else
20382 {
20383 if (VECTOR_MODE_P (mode0))
20384 op0 = safe_vector_operand (op0, mode0);
20385
20386 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20387 op0 = copy_to_mode_reg (mode0, op0);
20388 }
20389
20390 pat = GEN_FCN (icode) (target, op0);
20391 if (! pat)
20392 return 0;
20393 emit_insn (pat);
20394 return target;
20395 }
20396
20397 typedef enum {
20398 NEON_ARG_COPY_TO_REG,
20399 NEON_ARG_CONSTANT,
20400 NEON_ARG_MEMORY,
20401 NEON_ARG_STOP
20402 } builtin_arg;
20403
20404 #define NEON_MAX_BUILTIN_ARGS 5
20405
20406 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20407 and return an expression for the accessed memory.
20408
20409 The intrinsic function operates on a block of registers that has
20410 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20411 function references the memory at EXP of type TYPE and in mode
20412 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20413 available. */
20414
20415 static tree
20416 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
20417 enum machine_mode reg_mode,
20418 neon_builtin_type_mode type_mode)
20419 {
20420 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20421 tree elem_type, upper_bound, array_type;
20422
20423 /* Work out the size of the register block in bytes. */
20424 reg_size = GET_MODE_SIZE (reg_mode);
20425
20426 /* Work out the size of each vector in bytes. */
20427 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20428 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20429
20430 /* Work out how many vectors there are. */
20431 gcc_assert (reg_size % vector_size == 0);
20432 nvectors = reg_size / vector_size;
20433
20434 /* Work out the type of each element. */
20435 gcc_assert (POINTER_TYPE_P (type));
20436 elem_type = TREE_TYPE (type);
20437
20438 /* Work out how many elements are being loaded or stored.
20439 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20440 and memory elements; anything else implies a lane load or store. */
20441 if (mem_mode == reg_mode)
20442 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
20443 else
20444 nelems = nvectors;
20445
20446 /* Create a type that describes the full access. */
20447 upper_bound = build_int_cst (size_type_node, nelems - 1);
20448 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20449
20450 /* Dereference EXP using that type. */
20451 return fold_build2 (MEM_REF, array_type, exp,
20452 build_int_cst (build_pointer_type (array_type), 0));
20453 }
20454
20455 /* Expand a Neon builtin. */
20456 static rtx
20457 arm_expand_neon_args (rtx target, int icode, int have_retval,
20458 neon_builtin_type_mode type_mode,
20459 tree exp, int fcode, ...)
20460 {
20461 va_list ap;
20462 rtx pat;
20463 tree arg[NEON_MAX_BUILTIN_ARGS];
20464 rtx op[NEON_MAX_BUILTIN_ARGS];
20465 tree arg_type;
20466 tree formals;
20467 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20468 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20469 enum machine_mode other_mode;
20470 int argc = 0;
20471 int opno;
20472
20473 if (have_retval
20474 && (!target
20475 || GET_MODE (target) != tmode
20476 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20477 target = gen_reg_rtx (tmode);
20478
20479 va_start (ap, fcode);
20480
20481 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
20482
20483 for (;;)
20484 {
20485 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20486
20487 if (thisarg == NEON_ARG_STOP)
20488 break;
20489 else
20490 {
20491 opno = argc + have_retval;
20492 mode[argc] = insn_data[icode].operand[opno].mode;
20493 arg[argc] = CALL_EXPR_ARG (exp, argc);
20494 arg_type = TREE_VALUE (formals);
20495 if (thisarg == NEON_ARG_MEMORY)
20496 {
20497 other_mode = insn_data[icode].operand[1 - opno].mode;
20498 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
20499 mode[argc], other_mode,
20500 type_mode);
20501 }
20502
20503 op[argc] = expand_normal (arg[argc]);
20504
20505 switch (thisarg)
20506 {
20507 case NEON_ARG_COPY_TO_REG:
20508 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20509 if (!(*insn_data[icode].operand[opno].predicate)
20510 (op[argc], mode[argc]))
20511 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20512 break;
20513
20514 case NEON_ARG_CONSTANT:
20515 /* FIXME: This error message is somewhat unhelpful. */
20516 if (!(*insn_data[icode].operand[opno].predicate)
20517 (op[argc], mode[argc]))
20518 error ("argument must be a constant");
20519 break;
20520
20521 case NEON_ARG_MEMORY:
20522 gcc_assert (MEM_P (op[argc]));
20523 PUT_MODE (op[argc], mode[argc]);
20524 /* ??? arm_neon.h uses the same built-in functions for signed
20525 and unsigned accesses, casting where necessary. This isn't
20526 alias safe. */
20527 set_mem_alias_set (op[argc], 0);
20528 if (!(*insn_data[icode].operand[opno].predicate)
20529 (op[argc], mode[argc]))
20530 op[argc] = (replace_equiv_address
20531 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20532 break;
20533
20534 case NEON_ARG_STOP:
20535 gcc_unreachable ();
20536 }
20537
20538 argc++;
20539 formals = TREE_CHAIN (formals);
20540 }
20541 }
20542
20543 va_end (ap);
20544
20545 if (have_retval)
20546 switch (argc)
20547 {
20548 case 1:
20549 pat = GEN_FCN (icode) (target, op[0]);
20550 break;
20551
20552 case 2:
20553 pat = GEN_FCN (icode) (target, op[0], op[1]);
20554 break;
20555
20556 case 3:
20557 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20558 break;
20559
20560 case 4:
20561 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20562 break;
20563
20564 case 5:
20565 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20566 break;
20567
20568 default:
20569 gcc_unreachable ();
20570 }
20571 else
20572 switch (argc)
20573 {
20574 case 1:
20575 pat = GEN_FCN (icode) (op[0]);
20576 break;
20577
20578 case 2:
20579 pat = GEN_FCN (icode) (op[0], op[1]);
20580 break;
20581
20582 case 3:
20583 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20584 break;
20585
20586 case 4:
20587 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20588 break;
20589
20590 case 5:
20591 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20592 break;
20593
20594 default:
20595 gcc_unreachable ();
20596 }
20597
20598 if (!pat)
20599 return 0;
20600
20601 emit_insn (pat);
20602
20603 return target;
20604 }
20605
20606 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20607 constants defined per-instruction or per instruction-variant. Instead, the
20608 required info is looked up in the table neon_builtin_data. */
20609 static rtx
20610 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20611 {
20612 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20613 neon_itype itype = d->itype;
20614 enum insn_code icode = d->code;
20615 neon_builtin_type_mode type_mode = d->mode;
20616
20617 switch (itype)
20618 {
20619 case NEON_UNOP:
20620 case NEON_CONVERT:
20621 case NEON_DUPLANE:
20622 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20623 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20624
20625 case NEON_BINOP:
20626 case NEON_SETLANE:
20627 case NEON_SCALARMUL:
20628 case NEON_SCALARMULL:
20629 case NEON_SCALARMULH:
20630 case NEON_SHIFTINSERT:
20631 case NEON_LOGICBINOP:
20632 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20633 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20634 NEON_ARG_STOP);
20635
20636 case NEON_TERNOP:
20637 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20638 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20639 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20640
20641 case NEON_GETLANE:
20642 case NEON_FIXCONV:
20643 case NEON_SHIFTIMM:
20644 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20645 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20646 NEON_ARG_STOP);
20647
20648 case NEON_CREATE:
20649 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20650 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20651
20652 case NEON_DUP:
20653 case NEON_SPLIT:
20654 case NEON_REINTERP:
20655 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20656 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20657
20658 case NEON_COMBINE:
20659 case NEON_VTBL:
20660 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20661 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20662
20663 case NEON_RESULTPAIR:
20664 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20665 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20666 NEON_ARG_STOP);
20667
20668 case NEON_LANEMUL:
20669 case NEON_LANEMULL:
20670 case NEON_LANEMULH:
20671 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20672 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20673 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20674
20675 case NEON_LANEMAC:
20676 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20677 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20678 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20679
20680 case NEON_SHIFTACC:
20681 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20682 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20683 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20684
20685 case NEON_SCALARMAC:
20686 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20687 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20688 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20689
20690 case NEON_SELECT:
20691 case NEON_VTBX:
20692 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20693 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20694 NEON_ARG_STOP);
20695
20696 case NEON_LOAD1:
20697 case NEON_LOADSTRUCT:
20698 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20699 NEON_ARG_MEMORY, NEON_ARG_STOP);
20700
20701 case NEON_LOAD1LANE:
20702 case NEON_LOADSTRUCTLANE:
20703 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20704 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20705 NEON_ARG_STOP);
20706
20707 case NEON_STORE1:
20708 case NEON_STORESTRUCT:
20709 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20710 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20711
20712 case NEON_STORE1LANE:
20713 case NEON_STORESTRUCTLANE:
20714 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20715 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20716 NEON_ARG_STOP);
20717 }
20718
20719 gcc_unreachable ();
20720 }
20721
20722 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20723 void
20724 neon_reinterpret (rtx dest, rtx src)
20725 {
20726 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20727 }
20728
20729 /* Emit code to place a Neon pair result in memory locations (with equal
20730 registers). */
20731 void
20732 neon_emit_pair_result_insn (enum machine_mode mode,
20733 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20734 rtx op1, rtx op2)
20735 {
20736 rtx mem = gen_rtx_MEM (mode, destaddr);
20737 rtx tmp1 = gen_reg_rtx (mode);
20738 rtx tmp2 = gen_reg_rtx (mode);
20739
20740 emit_insn (intfn (tmp1, op1, op2, tmp2));
20741
20742 emit_move_insn (mem, tmp1);
20743 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20744 emit_move_insn (mem, tmp2);
20745 }
20746
20747 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20748 not to early-clobber SRC registers in the process.
20749
20750 We assume that the operands described by SRC and DEST represent a
20751 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20752 number of components into which the copy has been decomposed. */
20753 void
20754 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20755 {
20756 unsigned int i;
20757
20758 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20759 || REGNO (operands[0]) < REGNO (operands[1]))
20760 {
20761 for (i = 0; i < count; i++)
20762 {
20763 operands[2 * i] = dest[i];
20764 operands[2 * i + 1] = src[i];
20765 }
20766 }
20767 else
20768 {
20769 for (i = 0; i < count; i++)
20770 {
20771 operands[2 * i] = dest[count - i - 1];
20772 operands[2 * i + 1] = src[count - i - 1];
20773 }
20774 }
20775 }
20776
20777 /* Split operands into moves from op[1] + op[2] into op[0]. */
20778
20779 void
20780 neon_split_vcombine (rtx operands[3])
20781 {
20782 unsigned int dest = REGNO (operands[0]);
20783 unsigned int src1 = REGNO (operands[1]);
20784 unsigned int src2 = REGNO (operands[2]);
20785 enum machine_mode halfmode = GET_MODE (operands[1]);
20786 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20787 rtx destlo, desthi;
20788
20789 if (src1 == dest && src2 == dest + halfregs)
20790 {
20791 /* No-op move. Can't split to nothing; emit something. */
20792 emit_note (NOTE_INSN_DELETED);
20793 return;
20794 }
20795
20796 /* Preserve register attributes for variable tracking. */
20797 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20798 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20799 GET_MODE_SIZE (halfmode));
20800
20801 /* Special case of reversed high/low parts. Use VSWP. */
20802 if (src2 == dest && src1 == dest + halfregs)
20803 {
20804 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20805 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20806 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20807 return;
20808 }
20809
20810 if (!reg_overlap_mentioned_p (operands[2], destlo))
20811 {
20812 /* Try to avoid unnecessary moves if part of the result
20813 is in the right place already. */
20814 if (src1 != dest)
20815 emit_move_insn (destlo, operands[1]);
20816 if (src2 != dest + halfregs)
20817 emit_move_insn (desthi, operands[2]);
20818 }
20819 else
20820 {
20821 if (src2 != dest + halfregs)
20822 emit_move_insn (desthi, operands[2]);
20823 if (src1 != dest)
20824 emit_move_insn (destlo, operands[1]);
20825 }
20826 }
20827
20828 /* Expand an expression EXP that calls a built-in function,
20829 with result going to TARGET if that's convenient
20830 (and in mode MODE if that's convenient).
20831 SUBTARGET may be used as the target for computing one of EXP's operands.
20832 IGNORE is nonzero if the value is to be ignored. */
20833
20834 static rtx
20835 arm_expand_builtin (tree exp,
20836 rtx target,
20837 rtx subtarget ATTRIBUTE_UNUSED,
20838 enum machine_mode mode ATTRIBUTE_UNUSED,
20839 int ignore ATTRIBUTE_UNUSED)
20840 {
20841 const struct builtin_description * d;
20842 enum insn_code icode;
20843 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20844 tree arg0;
20845 tree arg1;
20846 tree arg2;
20847 rtx op0;
20848 rtx op1;
20849 rtx op2;
20850 rtx pat;
20851 int fcode = DECL_FUNCTION_CODE (fndecl);
20852 size_t i;
20853 enum machine_mode tmode;
20854 enum machine_mode mode0;
20855 enum machine_mode mode1;
20856 enum machine_mode mode2;
20857 int opint;
20858 int selector;
20859 int mask;
20860 int imm;
20861
20862 if (fcode >= ARM_BUILTIN_NEON_BASE)
20863 return arm_expand_neon_builtin (fcode, exp, target);
20864
20865 switch (fcode)
20866 {
20867 case ARM_BUILTIN_TEXTRMSB:
20868 case ARM_BUILTIN_TEXTRMUB:
20869 case ARM_BUILTIN_TEXTRMSH:
20870 case ARM_BUILTIN_TEXTRMUH:
20871 case ARM_BUILTIN_TEXTRMSW:
20872 case ARM_BUILTIN_TEXTRMUW:
20873 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20874 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20875 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20876 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20877 : CODE_FOR_iwmmxt_textrmw);
20878
20879 arg0 = CALL_EXPR_ARG (exp, 0);
20880 arg1 = CALL_EXPR_ARG (exp, 1);
20881 op0 = expand_normal (arg0);
20882 op1 = expand_normal (arg1);
20883 tmode = insn_data[icode].operand[0].mode;
20884 mode0 = insn_data[icode].operand[1].mode;
20885 mode1 = insn_data[icode].operand[2].mode;
20886
20887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20888 op0 = copy_to_mode_reg (mode0, op0);
20889 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20890 {
20891 /* @@@ better error message */
20892 error ("selector must be an immediate");
20893 return gen_reg_rtx (tmode);
20894 }
20895
20896 opint = INTVAL (op1);
20897 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20898 {
20899 if (opint > 7 || opint < 0)
20900 error ("the range of selector should be in 0 to 7");
20901 }
20902 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20903 {
20904 if (opint > 3 || opint < 0)
20905 error ("the range of selector should be in 0 to 3");
20906 }
20907 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20908 {
20909 if (opint > 1 || opint < 0)
20910 error ("the range of selector should be in 0 to 1");
20911 }
20912
20913 if (target == 0
20914 || GET_MODE (target) != tmode
20915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20916 target = gen_reg_rtx (tmode);
20917 pat = GEN_FCN (icode) (target, op0, op1);
20918 if (! pat)
20919 return 0;
20920 emit_insn (pat);
20921 return target;
20922
20923 case ARM_BUILTIN_WALIGNI:
20924 /* If op2 is immediate, call walighi, else call walighr. */
20925 arg0 = CALL_EXPR_ARG (exp, 0);
20926 arg1 = CALL_EXPR_ARG (exp, 1);
20927 arg2 = CALL_EXPR_ARG (exp, 2);
20928 op0 = expand_normal (arg0);
20929 op1 = expand_normal (arg1);
20930 op2 = expand_normal (arg2);
20931 if (CONST_INT_P (op2))
20932 {
20933 icode = CODE_FOR_iwmmxt_waligni;
20934 tmode = insn_data[icode].operand[0].mode;
20935 mode0 = insn_data[icode].operand[1].mode;
20936 mode1 = insn_data[icode].operand[2].mode;
20937 mode2 = insn_data[icode].operand[3].mode;
20938 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20939 op0 = copy_to_mode_reg (mode0, op0);
20940 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20941 op1 = copy_to_mode_reg (mode1, op1);
20942 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
20943 selector = INTVAL (op2);
20944 if (selector > 7 || selector < 0)
20945 error ("the range of selector should be in 0 to 7");
20946 }
20947 else
20948 {
20949 icode = CODE_FOR_iwmmxt_walignr;
20950 tmode = insn_data[icode].operand[0].mode;
20951 mode0 = insn_data[icode].operand[1].mode;
20952 mode1 = insn_data[icode].operand[2].mode;
20953 mode2 = insn_data[icode].operand[3].mode;
20954 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20955 op0 = copy_to_mode_reg (mode0, op0);
20956 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20957 op1 = copy_to_mode_reg (mode1, op1);
20958 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
20959 op2 = copy_to_mode_reg (mode2, op2);
20960 }
20961 if (target == 0
20962 || GET_MODE (target) != tmode
20963 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20964 target = gen_reg_rtx (tmode);
20965 pat = GEN_FCN (icode) (target, op0, op1, op2);
20966 if (!pat)
20967 return 0;
20968 emit_insn (pat);
20969 return target;
20970
20971 case ARM_BUILTIN_TINSRB:
20972 case ARM_BUILTIN_TINSRH:
20973 case ARM_BUILTIN_TINSRW:
20974 case ARM_BUILTIN_WMERGE:
20975 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20976 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20977 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
20978 : CODE_FOR_iwmmxt_tinsrw);
20979 arg0 = CALL_EXPR_ARG (exp, 0);
20980 arg1 = CALL_EXPR_ARG (exp, 1);
20981 arg2 = CALL_EXPR_ARG (exp, 2);
20982 op0 = expand_normal (arg0);
20983 op1 = expand_normal (arg1);
20984 op2 = expand_normal (arg2);
20985 tmode = insn_data[icode].operand[0].mode;
20986 mode0 = insn_data[icode].operand[1].mode;
20987 mode1 = insn_data[icode].operand[2].mode;
20988 mode2 = insn_data[icode].operand[3].mode;
20989
20990 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20991 op0 = copy_to_mode_reg (mode0, op0);
20992 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20993 op1 = copy_to_mode_reg (mode1, op1);
20994 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20995 {
20996 error ("selector must be an immediate");
20997 return const0_rtx;
20998 }
20999 if (icode == CODE_FOR_iwmmxt_wmerge)
21000 {
21001 selector = INTVAL (op2);
21002 if (selector > 7 || selector < 0)
21003 error ("the range of selector should be in 0 to 7");
21004 }
21005 if ((icode == CODE_FOR_iwmmxt_tinsrb)
21006 || (icode == CODE_FOR_iwmmxt_tinsrh)
21007 || (icode == CODE_FOR_iwmmxt_tinsrw))
21008 {
21009 mask = 0x01;
21010 selector= INTVAL (op2);
21011 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
21012 error ("the range of selector should be in 0 to 7");
21013 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
21014 error ("the range of selector should be in 0 to 3");
21015 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
21016 error ("the range of selector should be in 0 to 1");
21017 mask <<= selector;
21018 op2 = GEN_INT (mask);
21019 }
21020 if (target == 0
21021 || GET_MODE (target) != tmode
21022 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21023 target = gen_reg_rtx (tmode);
21024 pat = GEN_FCN (icode) (target, op0, op1, op2);
21025 if (! pat)
21026 return 0;
21027 emit_insn (pat);
21028 return target;
21029
21030 case ARM_BUILTIN_SETWCGR0:
21031 case ARM_BUILTIN_SETWCGR1:
21032 case ARM_BUILTIN_SETWCGR2:
21033 case ARM_BUILTIN_SETWCGR3:
21034 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21035 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21036 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21037 : CODE_FOR_iwmmxt_setwcgr3);
21038 arg0 = CALL_EXPR_ARG (exp, 0);
21039 op0 = expand_normal (arg0);
21040 mode0 = insn_data[icode].operand[0].mode;
21041 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21042 op0 = copy_to_mode_reg (mode0, op0);
21043 pat = GEN_FCN (icode) (op0);
21044 if (!pat)
21045 return 0;
21046 emit_insn (pat);
21047 return 0;
21048
21049 case ARM_BUILTIN_GETWCGR0:
21050 case ARM_BUILTIN_GETWCGR1:
21051 case ARM_BUILTIN_GETWCGR2:
21052 case ARM_BUILTIN_GETWCGR3:
21053 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21054 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21055 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21056 : CODE_FOR_iwmmxt_getwcgr3);
21057 tmode = insn_data[icode].operand[0].mode;
21058 if (target == 0
21059 || GET_MODE (target) != tmode
21060 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21061 target = gen_reg_rtx (tmode);
21062 pat = GEN_FCN (icode) (target);
21063 if (!pat)
21064 return 0;
21065 emit_insn (pat);
21066 return target;
21067
21068 case ARM_BUILTIN_WSHUFH:
21069 icode = CODE_FOR_iwmmxt_wshufh;
21070 arg0 = CALL_EXPR_ARG (exp, 0);
21071 arg1 = CALL_EXPR_ARG (exp, 1);
21072 op0 = expand_normal (arg0);
21073 op1 = expand_normal (arg1);
21074 tmode = insn_data[icode].operand[0].mode;
21075 mode1 = insn_data[icode].operand[1].mode;
21076 mode2 = insn_data[icode].operand[2].mode;
21077
21078 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21079 op0 = copy_to_mode_reg (mode1, op0);
21080 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21081 {
21082 error ("mask must be an immediate");
21083 return const0_rtx;
21084 }
21085 selector = INTVAL (op1);
21086 if (selector < 0 || selector > 255)
21087 error ("the range of mask should be in 0 to 255");
21088 if (target == 0
21089 || GET_MODE (target) != tmode
21090 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21091 target = gen_reg_rtx (tmode);
21092 pat = GEN_FCN (icode) (target, op0, op1);
21093 if (! pat)
21094 return 0;
21095 emit_insn (pat);
21096 return target;
21097
21098 case ARM_BUILTIN_WMADDS:
21099 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21100 case ARM_BUILTIN_WMADDSX:
21101 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21102 case ARM_BUILTIN_WMADDSN:
21103 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21104 case ARM_BUILTIN_WMADDU:
21105 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21106 case ARM_BUILTIN_WMADDUX:
21107 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21108 case ARM_BUILTIN_WMADDUN:
21109 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21110 case ARM_BUILTIN_WSADBZ:
21111 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21112 case ARM_BUILTIN_WSADHZ:
21113 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21114
21115 /* Several three-argument builtins. */
21116 case ARM_BUILTIN_WMACS:
21117 case ARM_BUILTIN_WMACU:
21118 case ARM_BUILTIN_TMIA:
21119 case ARM_BUILTIN_TMIAPH:
21120 case ARM_BUILTIN_TMIATT:
21121 case ARM_BUILTIN_TMIATB:
21122 case ARM_BUILTIN_TMIABT:
21123 case ARM_BUILTIN_TMIABB:
21124 case ARM_BUILTIN_WQMIABB:
21125 case ARM_BUILTIN_WQMIABT:
21126 case ARM_BUILTIN_WQMIATB:
21127 case ARM_BUILTIN_WQMIATT:
21128 case ARM_BUILTIN_WQMIABBN:
21129 case ARM_BUILTIN_WQMIABTN:
21130 case ARM_BUILTIN_WQMIATBN:
21131 case ARM_BUILTIN_WQMIATTN:
21132 case ARM_BUILTIN_WMIABB:
21133 case ARM_BUILTIN_WMIABT:
21134 case ARM_BUILTIN_WMIATB:
21135 case ARM_BUILTIN_WMIATT:
21136 case ARM_BUILTIN_WMIABBN:
21137 case ARM_BUILTIN_WMIABTN:
21138 case ARM_BUILTIN_WMIATBN:
21139 case ARM_BUILTIN_WMIATTN:
21140 case ARM_BUILTIN_WMIAWBB:
21141 case ARM_BUILTIN_WMIAWBT:
21142 case ARM_BUILTIN_WMIAWTB:
21143 case ARM_BUILTIN_WMIAWTT:
21144 case ARM_BUILTIN_WMIAWBBN:
21145 case ARM_BUILTIN_WMIAWBTN:
21146 case ARM_BUILTIN_WMIAWTBN:
21147 case ARM_BUILTIN_WMIAWTTN:
21148 case ARM_BUILTIN_WSADB:
21149 case ARM_BUILTIN_WSADH:
21150 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21151 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21152 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21153 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21154 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21155 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21156 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21157 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21158 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21159 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21160 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21161 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21162 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21163 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21164 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21165 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21166 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21167 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21168 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21169 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21170 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21171 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21172 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21173 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21174 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21175 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21176 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21177 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21178 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21179 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21180 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21181 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21182 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21183 : CODE_FOR_iwmmxt_wsadh);
21184 arg0 = CALL_EXPR_ARG (exp, 0);
21185 arg1 = CALL_EXPR_ARG (exp, 1);
21186 arg2 = CALL_EXPR_ARG (exp, 2);
21187 op0 = expand_normal (arg0);
21188 op1 = expand_normal (arg1);
21189 op2 = expand_normal (arg2);
21190 tmode = insn_data[icode].operand[0].mode;
21191 mode0 = insn_data[icode].operand[1].mode;
21192 mode1 = insn_data[icode].operand[2].mode;
21193 mode2 = insn_data[icode].operand[3].mode;
21194
21195 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21196 op0 = copy_to_mode_reg (mode0, op0);
21197 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21198 op1 = copy_to_mode_reg (mode1, op1);
21199 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21200 op2 = copy_to_mode_reg (mode2, op2);
21201 if (target == 0
21202 || GET_MODE (target) != tmode
21203 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21204 target = gen_reg_rtx (tmode);
21205 pat = GEN_FCN (icode) (target, op0, op1, op2);
21206 if (! pat)
21207 return 0;
21208 emit_insn (pat);
21209 return target;
21210
21211 case ARM_BUILTIN_WZERO:
21212 target = gen_reg_rtx (DImode);
21213 emit_insn (gen_iwmmxt_clrdi (target));
21214 return target;
21215
21216 case ARM_BUILTIN_WSRLHI:
21217 case ARM_BUILTIN_WSRLWI:
21218 case ARM_BUILTIN_WSRLDI:
21219 case ARM_BUILTIN_WSLLHI:
21220 case ARM_BUILTIN_WSLLWI:
21221 case ARM_BUILTIN_WSLLDI:
21222 case ARM_BUILTIN_WSRAHI:
21223 case ARM_BUILTIN_WSRAWI:
21224 case ARM_BUILTIN_WSRADI:
21225 case ARM_BUILTIN_WRORHI:
21226 case ARM_BUILTIN_WRORWI:
21227 case ARM_BUILTIN_WRORDI:
21228 case ARM_BUILTIN_WSRLH:
21229 case ARM_BUILTIN_WSRLW:
21230 case ARM_BUILTIN_WSRLD:
21231 case ARM_BUILTIN_WSLLH:
21232 case ARM_BUILTIN_WSLLW:
21233 case ARM_BUILTIN_WSLLD:
21234 case ARM_BUILTIN_WSRAH:
21235 case ARM_BUILTIN_WSRAW:
21236 case ARM_BUILTIN_WSRAD:
21237 case ARM_BUILTIN_WRORH:
21238 case ARM_BUILTIN_WRORW:
21239 case ARM_BUILTIN_WRORD:
21240 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21241 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21242 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21243 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21244 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21245 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21246 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21247 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21248 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21249 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21250 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21251 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21252 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21253 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21254 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21255 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21256 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21257 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21258 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21259 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21260 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21261 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21262 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21263 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21264 : CODE_FOR_nothing);
21265 arg1 = CALL_EXPR_ARG (exp, 1);
21266 op1 = expand_normal (arg1);
21267 if (GET_MODE (op1) == VOIDmode)
21268 {
21269 imm = INTVAL (op1);
21270 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21271 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21272 && (imm < 0 || imm > 32))
21273 {
21274 if (fcode == ARM_BUILTIN_WRORHI)
21275 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21276 else if (fcode == ARM_BUILTIN_WRORWI)
21277 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21278 else if (fcode == ARM_BUILTIN_WRORH)
21279 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21280 else
21281 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21282 }
21283 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21284 && (imm < 0 || imm > 64))
21285 {
21286 if (fcode == ARM_BUILTIN_WRORDI)
21287 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21288 else
21289 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21290 }
21291 else if (imm < 0)
21292 {
21293 if (fcode == ARM_BUILTIN_WSRLHI)
21294 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21295 else if (fcode == ARM_BUILTIN_WSRLWI)
21296 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21297 else if (fcode == ARM_BUILTIN_WSRLDI)
21298 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21299 else if (fcode == ARM_BUILTIN_WSLLHI)
21300 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21301 else if (fcode == ARM_BUILTIN_WSLLWI)
21302 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21303 else if (fcode == ARM_BUILTIN_WSLLDI)
21304 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21305 else if (fcode == ARM_BUILTIN_WSRAHI)
21306 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21307 else if (fcode == ARM_BUILTIN_WSRAWI)
21308 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21309 else if (fcode == ARM_BUILTIN_WSRADI)
21310 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21311 else if (fcode == ARM_BUILTIN_WSRLH)
21312 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21313 else if (fcode == ARM_BUILTIN_WSRLW)
21314 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21315 else if (fcode == ARM_BUILTIN_WSRLD)
21316 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21317 else if (fcode == ARM_BUILTIN_WSLLH)
21318 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21319 else if (fcode == ARM_BUILTIN_WSLLW)
21320 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21321 else if (fcode == ARM_BUILTIN_WSLLD)
21322 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21323 else if (fcode == ARM_BUILTIN_WSRAH)
21324 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21325 else if (fcode == ARM_BUILTIN_WSRAW)
21326 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21327 else
21328 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21329 }
21330 }
21331 return arm_expand_binop_builtin (icode, exp, target);
21332
21333 case ARM_BUILTIN_THREAD_POINTER:
21334 return arm_load_tp (target);
21335
21336 default:
21337 break;
21338 }
21339
21340 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21341 if (d->code == (const enum arm_builtins) fcode)
21342 return arm_expand_binop_builtin (d->icode, exp, target);
21343
21344 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21345 if (d->code == (const enum arm_builtins) fcode)
21346 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21347
21348 /* @@@ Should really do something sensible here. */
21349 return NULL_RTX;
21350 }
21351 \f
21352 /* Return the number (counting from 0) of
21353 the least significant set bit in MASK. */
21354
21355 inline static int
21356 number_of_first_bit_set (unsigned mask)
21357 {
21358 return ctz_hwi (mask);
21359 }
21360
21361 /* Like emit_multi_reg_push, but allowing for a different set of
21362 registers to be described as saved. MASK is the set of registers
21363 to be saved; REAL_REGS is the set of registers to be described as
21364 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21365
21366 static rtx
21367 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21368 {
21369 unsigned long regno;
21370 rtx par[10], tmp, reg, insn;
21371 int i, j;
21372
21373 /* Build the parallel of the registers actually being stored. */
21374 for (i = 0; mask; ++i, mask &= mask - 1)
21375 {
21376 regno = ctz_hwi (mask);
21377 reg = gen_rtx_REG (SImode, regno);
21378
21379 if (i == 0)
21380 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21381 else
21382 tmp = gen_rtx_USE (VOIDmode, reg);
21383
21384 par[i] = tmp;
21385 }
21386
21387 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21388 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21389 tmp = gen_frame_mem (BLKmode, tmp);
21390 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21391 par[0] = tmp;
21392
21393 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21394 insn = emit_insn (tmp);
21395
21396 /* Always build the stack adjustment note for unwind info. */
21397 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21398 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21399 par[0] = tmp;
21400
21401 /* Build the parallel of the registers recorded as saved for unwind. */
21402 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21403 {
21404 regno = ctz_hwi (real_regs);
21405 reg = gen_rtx_REG (SImode, regno);
21406
21407 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21408 tmp = gen_frame_mem (SImode, tmp);
21409 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21410 RTX_FRAME_RELATED_P (tmp) = 1;
21411 par[j + 1] = tmp;
21412 }
21413
21414 if (j == 0)
21415 tmp = par[0];
21416 else
21417 {
21418 RTX_FRAME_RELATED_P (par[0]) = 1;
21419 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21420 }
21421
21422 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21423
21424 return insn;
21425 }
21426
21427 /* Emit code to push or pop registers to or from the stack. F is the
21428 assembly file. MASK is the registers to pop. */
21429 static void
21430 thumb_pop (FILE *f, unsigned long mask)
21431 {
21432 int regno;
21433 int lo_mask = mask & 0xFF;
21434 int pushed_words = 0;
21435
21436 gcc_assert (mask);
21437
21438 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21439 {
21440 /* Special case. Do not generate a POP PC statement here, do it in
21441 thumb_exit() */
21442 thumb_exit (f, -1);
21443 return;
21444 }
21445
21446 fprintf (f, "\tpop\t{");
21447
21448 /* Look at the low registers first. */
21449 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21450 {
21451 if (lo_mask & 1)
21452 {
21453 asm_fprintf (f, "%r", regno);
21454
21455 if ((lo_mask & ~1) != 0)
21456 fprintf (f, ", ");
21457
21458 pushed_words++;
21459 }
21460 }
21461
21462 if (mask & (1 << PC_REGNUM))
21463 {
21464 /* Catch popping the PC. */
21465 if (TARGET_INTERWORK || TARGET_BACKTRACE
21466 || crtl->calls_eh_return)
21467 {
21468 /* The PC is never poped directly, instead
21469 it is popped into r3 and then BX is used. */
21470 fprintf (f, "}\n");
21471
21472 thumb_exit (f, -1);
21473
21474 return;
21475 }
21476 else
21477 {
21478 if (mask & 0xFF)
21479 fprintf (f, ", ");
21480
21481 asm_fprintf (f, "%r", PC_REGNUM);
21482 }
21483 }
21484
21485 fprintf (f, "}\n");
21486 }
21487
21488 /* Generate code to return from a thumb function.
21489 If 'reg_containing_return_addr' is -1, then the return address is
21490 actually on the stack, at the stack pointer. */
21491 static void
21492 thumb_exit (FILE *f, int reg_containing_return_addr)
21493 {
21494 unsigned regs_available_for_popping;
21495 unsigned regs_to_pop;
21496 int pops_needed;
21497 unsigned available;
21498 unsigned required;
21499 int mode;
21500 int size;
21501 int restore_a4 = FALSE;
21502
21503 /* Compute the registers we need to pop. */
21504 regs_to_pop = 0;
21505 pops_needed = 0;
21506
21507 if (reg_containing_return_addr == -1)
21508 {
21509 regs_to_pop |= 1 << LR_REGNUM;
21510 ++pops_needed;
21511 }
21512
21513 if (TARGET_BACKTRACE)
21514 {
21515 /* Restore the (ARM) frame pointer and stack pointer. */
21516 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21517 pops_needed += 2;
21518 }
21519
21520 /* If there is nothing to pop then just emit the BX instruction and
21521 return. */
21522 if (pops_needed == 0)
21523 {
21524 if (crtl->calls_eh_return)
21525 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21526
21527 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21528 return;
21529 }
21530 /* Otherwise if we are not supporting interworking and we have not created
21531 a backtrace structure and the function was not entered in ARM mode then
21532 just pop the return address straight into the PC. */
21533 else if (!TARGET_INTERWORK
21534 && !TARGET_BACKTRACE
21535 && !is_called_in_ARM_mode (current_function_decl)
21536 && !crtl->calls_eh_return)
21537 {
21538 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21539 return;
21540 }
21541
21542 /* Find out how many of the (return) argument registers we can corrupt. */
21543 regs_available_for_popping = 0;
21544
21545 /* If returning via __builtin_eh_return, the bottom three registers
21546 all contain information needed for the return. */
21547 if (crtl->calls_eh_return)
21548 size = 12;
21549 else
21550 {
21551 /* If we can deduce the registers used from the function's
21552 return value. This is more reliable that examining
21553 df_regs_ever_live_p () because that will be set if the register is
21554 ever used in the function, not just if the register is used
21555 to hold a return value. */
21556
21557 if (crtl->return_rtx != 0)
21558 mode = GET_MODE (crtl->return_rtx);
21559 else
21560 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21561
21562 size = GET_MODE_SIZE (mode);
21563
21564 if (size == 0)
21565 {
21566 /* In a void function we can use any argument register.
21567 In a function that returns a structure on the stack
21568 we can use the second and third argument registers. */
21569 if (mode == VOIDmode)
21570 regs_available_for_popping =
21571 (1 << ARG_REGISTER (1))
21572 | (1 << ARG_REGISTER (2))
21573 | (1 << ARG_REGISTER (3));
21574 else
21575 regs_available_for_popping =
21576 (1 << ARG_REGISTER (2))
21577 | (1 << ARG_REGISTER (3));
21578 }
21579 else if (size <= 4)
21580 regs_available_for_popping =
21581 (1 << ARG_REGISTER (2))
21582 | (1 << ARG_REGISTER (3));
21583 else if (size <= 8)
21584 regs_available_for_popping =
21585 (1 << ARG_REGISTER (3));
21586 }
21587
21588 /* Match registers to be popped with registers into which we pop them. */
21589 for (available = regs_available_for_popping,
21590 required = regs_to_pop;
21591 required != 0 && available != 0;
21592 available &= ~(available & - available),
21593 required &= ~(required & - required))
21594 -- pops_needed;
21595
21596 /* If we have any popping registers left over, remove them. */
21597 if (available > 0)
21598 regs_available_for_popping &= ~available;
21599
21600 /* Otherwise if we need another popping register we can use
21601 the fourth argument register. */
21602 else if (pops_needed)
21603 {
21604 /* If we have not found any free argument registers and
21605 reg a4 contains the return address, we must move it. */
21606 if (regs_available_for_popping == 0
21607 && reg_containing_return_addr == LAST_ARG_REGNUM)
21608 {
21609 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21610 reg_containing_return_addr = LR_REGNUM;
21611 }
21612 else if (size > 12)
21613 {
21614 /* Register a4 is being used to hold part of the return value,
21615 but we have dire need of a free, low register. */
21616 restore_a4 = TRUE;
21617
21618 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21619 }
21620
21621 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21622 {
21623 /* The fourth argument register is available. */
21624 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21625
21626 --pops_needed;
21627 }
21628 }
21629
21630 /* Pop as many registers as we can. */
21631 thumb_pop (f, regs_available_for_popping);
21632
21633 /* Process the registers we popped. */
21634 if (reg_containing_return_addr == -1)
21635 {
21636 /* The return address was popped into the lowest numbered register. */
21637 regs_to_pop &= ~(1 << LR_REGNUM);
21638
21639 reg_containing_return_addr =
21640 number_of_first_bit_set (regs_available_for_popping);
21641
21642 /* Remove this register for the mask of available registers, so that
21643 the return address will not be corrupted by further pops. */
21644 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21645 }
21646
21647 /* If we popped other registers then handle them here. */
21648 if (regs_available_for_popping)
21649 {
21650 int frame_pointer;
21651
21652 /* Work out which register currently contains the frame pointer. */
21653 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21654
21655 /* Move it into the correct place. */
21656 asm_fprintf (f, "\tmov\t%r, %r\n",
21657 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21658
21659 /* (Temporarily) remove it from the mask of popped registers. */
21660 regs_available_for_popping &= ~(1 << frame_pointer);
21661 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21662
21663 if (regs_available_for_popping)
21664 {
21665 int stack_pointer;
21666
21667 /* We popped the stack pointer as well,
21668 find the register that contains it. */
21669 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21670
21671 /* Move it into the stack register. */
21672 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21673
21674 /* At this point we have popped all necessary registers, so
21675 do not worry about restoring regs_available_for_popping
21676 to its correct value:
21677
21678 assert (pops_needed == 0)
21679 assert (regs_available_for_popping == (1 << frame_pointer))
21680 assert (regs_to_pop == (1 << STACK_POINTER)) */
21681 }
21682 else
21683 {
21684 /* Since we have just move the popped value into the frame
21685 pointer, the popping register is available for reuse, and
21686 we know that we still have the stack pointer left to pop. */
21687 regs_available_for_popping |= (1 << frame_pointer);
21688 }
21689 }
21690
21691 /* If we still have registers left on the stack, but we no longer have
21692 any registers into which we can pop them, then we must move the return
21693 address into the link register and make available the register that
21694 contained it. */
21695 if (regs_available_for_popping == 0 && pops_needed > 0)
21696 {
21697 regs_available_for_popping |= 1 << reg_containing_return_addr;
21698
21699 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21700 reg_containing_return_addr);
21701
21702 reg_containing_return_addr = LR_REGNUM;
21703 }
21704
21705 /* If we have registers left on the stack then pop some more.
21706 We know that at most we will want to pop FP and SP. */
21707 if (pops_needed > 0)
21708 {
21709 int popped_into;
21710 int move_to;
21711
21712 thumb_pop (f, regs_available_for_popping);
21713
21714 /* We have popped either FP or SP.
21715 Move whichever one it is into the correct register. */
21716 popped_into = number_of_first_bit_set (regs_available_for_popping);
21717 move_to = number_of_first_bit_set (regs_to_pop);
21718
21719 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21720
21721 regs_to_pop &= ~(1 << move_to);
21722
21723 --pops_needed;
21724 }
21725
21726 /* If we still have not popped everything then we must have only
21727 had one register available to us and we are now popping the SP. */
21728 if (pops_needed > 0)
21729 {
21730 int popped_into;
21731
21732 thumb_pop (f, regs_available_for_popping);
21733
21734 popped_into = number_of_first_bit_set (regs_available_for_popping);
21735
21736 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21737 /*
21738 assert (regs_to_pop == (1 << STACK_POINTER))
21739 assert (pops_needed == 1)
21740 */
21741 }
21742
21743 /* If necessary restore the a4 register. */
21744 if (restore_a4)
21745 {
21746 if (reg_containing_return_addr != LR_REGNUM)
21747 {
21748 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21749 reg_containing_return_addr = LR_REGNUM;
21750 }
21751
21752 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21753 }
21754
21755 if (crtl->calls_eh_return)
21756 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21757
21758 /* Return to caller. */
21759 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21760 }
21761 \f
21762 /* Scan INSN just before assembler is output for it.
21763 For Thumb-1, we track the status of the condition codes; this
21764 information is used in the cbranchsi4_insn pattern. */
21765 void
21766 thumb1_final_prescan_insn (rtx insn)
21767 {
21768 if (flag_print_asm_name)
21769 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21770 INSN_ADDRESSES (INSN_UID (insn)));
21771 /* Don't overwrite the previous setter when we get to a cbranch. */
21772 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21773 {
21774 enum attr_conds conds;
21775
21776 if (cfun->machine->thumb1_cc_insn)
21777 {
21778 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21779 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21780 CC_STATUS_INIT;
21781 }
21782 conds = get_attr_conds (insn);
21783 if (conds == CONDS_SET)
21784 {
21785 rtx set = single_set (insn);
21786 cfun->machine->thumb1_cc_insn = insn;
21787 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21788 cfun->machine->thumb1_cc_op1 = const0_rtx;
21789 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21790 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21791 {
21792 rtx src1 = XEXP (SET_SRC (set), 1);
21793 if (src1 == const0_rtx)
21794 cfun->machine->thumb1_cc_mode = CCmode;
21795 }
21796 }
21797 else if (conds != CONDS_NOCOND)
21798 cfun->machine->thumb1_cc_insn = NULL_RTX;
21799 }
21800 }
21801
21802 int
21803 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21804 {
21805 unsigned HOST_WIDE_INT mask = 0xff;
21806 int i;
21807
21808 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21809 if (val == 0) /* XXX */
21810 return 0;
21811
21812 for (i = 0; i < 25; i++)
21813 if ((val & (mask << i)) == val)
21814 return 1;
21815
21816 return 0;
21817 }
21818
21819 /* Returns nonzero if the current function contains,
21820 or might contain a far jump. */
21821 static int
21822 thumb_far_jump_used_p (void)
21823 {
21824 rtx insn;
21825
21826 /* This test is only important for leaf functions. */
21827 /* assert (!leaf_function_p ()); */
21828
21829 /* If we have already decided that far jumps may be used,
21830 do not bother checking again, and always return true even if
21831 it turns out that they are not being used. Once we have made
21832 the decision that far jumps are present (and that hence the link
21833 register will be pushed onto the stack) we cannot go back on it. */
21834 if (cfun->machine->far_jump_used)
21835 return 1;
21836
21837 /* If this function is not being called from the prologue/epilogue
21838 generation code then it must be being called from the
21839 INITIAL_ELIMINATION_OFFSET macro. */
21840 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21841 {
21842 /* In this case we know that we are being asked about the elimination
21843 of the arg pointer register. If that register is not being used,
21844 then there are no arguments on the stack, and we do not have to
21845 worry that a far jump might force the prologue to push the link
21846 register, changing the stack offsets. In this case we can just
21847 return false, since the presence of far jumps in the function will
21848 not affect stack offsets.
21849
21850 If the arg pointer is live (or if it was live, but has now been
21851 eliminated and so set to dead) then we do have to test to see if
21852 the function might contain a far jump. This test can lead to some
21853 false negatives, since before reload is completed, then length of
21854 branch instructions is not known, so gcc defaults to returning their
21855 longest length, which in turn sets the far jump attribute to true.
21856
21857 A false negative will not result in bad code being generated, but it
21858 will result in a needless push and pop of the link register. We
21859 hope that this does not occur too often.
21860
21861 If we need doubleword stack alignment this could affect the other
21862 elimination offsets so we can't risk getting it wrong. */
21863 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21864 cfun->machine->arg_pointer_live = 1;
21865 else if (!cfun->machine->arg_pointer_live)
21866 return 0;
21867 }
21868
21869 /* Check to see if the function contains a branch
21870 insn with the far jump attribute set. */
21871 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21872 {
21873 if (JUMP_P (insn)
21874 /* Ignore tablejump patterns. */
21875 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21876 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21877 && get_attr_far_jump (insn) == FAR_JUMP_YES
21878 )
21879 {
21880 /* Record the fact that we have decided that
21881 the function does use far jumps. */
21882 cfun->machine->far_jump_used = 1;
21883 return 1;
21884 }
21885 }
21886
21887 return 0;
21888 }
21889
21890 /* Return nonzero if FUNC must be entered in ARM mode. */
21891 int
21892 is_called_in_ARM_mode (tree func)
21893 {
21894 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21895
21896 /* Ignore the problem about functions whose address is taken. */
21897 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21898 return TRUE;
21899
21900 #ifdef ARM_PE
21901 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21902 #else
21903 return FALSE;
21904 #endif
21905 }
21906
21907 /* Given the stack offsets and register mask in OFFSETS, decide how
21908 many additional registers to push instead of subtracting a constant
21909 from SP. For epilogues the principle is the same except we use pop.
21910 FOR_PROLOGUE indicates which we're generating. */
21911 static int
21912 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21913 {
21914 HOST_WIDE_INT amount;
21915 unsigned long live_regs_mask = offsets->saved_regs_mask;
21916 /* Extract a mask of the ones we can give to the Thumb's push/pop
21917 instruction. */
21918 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21919 /* Then count how many other high registers will need to be pushed. */
21920 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21921 int n_free, reg_base, size;
21922
21923 if (!for_prologue && frame_pointer_needed)
21924 amount = offsets->locals_base - offsets->saved_regs;
21925 else
21926 amount = offsets->outgoing_args - offsets->saved_regs;
21927
21928 /* If the stack frame size is 512 exactly, we can save one load
21929 instruction, which should make this a win even when optimizing
21930 for speed. */
21931 if (!optimize_size && amount != 512)
21932 return 0;
21933
21934 /* Can't do this if there are high registers to push. */
21935 if (high_regs_pushed != 0)
21936 return 0;
21937
21938 /* Shouldn't do it in the prologue if no registers would normally
21939 be pushed at all. In the epilogue, also allow it if we'll have
21940 a pop insn for the PC. */
21941 if (l_mask == 0
21942 && (for_prologue
21943 || TARGET_BACKTRACE
21944 || (live_regs_mask & 1 << LR_REGNUM) == 0
21945 || TARGET_INTERWORK
21946 || crtl->args.pretend_args_size != 0))
21947 return 0;
21948
21949 /* Don't do this if thumb_expand_prologue wants to emit instructions
21950 between the push and the stack frame allocation. */
21951 if (for_prologue
21952 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21953 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21954 return 0;
21955
21956 reg_base = 0;
21957 n_free = 0;
21958 if (!for_prologue)
21959 {
21960 size = arm_size_return_regs ();
21961 reg_base = ARM_NUM_INTS (size);
21962 live_regs_mask >>= reg_base;
21963 }
21964
21965 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21966 && (for_prologue || call_used_regs[reg_base + n_free]))
21967 {
21968 live_regs_mask >>= 1;
21969 n_free++;
21970 }
21971
21972 if (n_free == 0)
21973 return 0;
21974 gcc_assert (amount / 4 * 4 == amount);
21975
21976 if (amount >= 512 && (amount - n_free * 4) < 512)
21977 return (amount - 508) / 4;
21978 if (amount <= n_free * 4)
21979 return amount / 4;
21980 return 0;
21981 }
21982
21983 /* The bits which aren't usefully expanded as rtl. */
21984 const char *
21985 thumb1_unexpanded_epilogue (void)
21986 {
21987 arm_stack_offsets *offsets;
21988 int regno;
21989 unsigned long live_regs_mask = 0;
21990 int high_regs_pushed = 0;
21991 int extra_pop;
21992 int had_to_push_lr;
21993 int size;
21994
21995 if (cfun->machine->return_used_this_function != 0)
21996 return "";
21997
21998 if (IS_NAKED (arm_current_func_type ()))
21999 return "";
22000
22001 offsets = arm_get_frame_offsets ();
22002 live_regs_mask = offsets->saved_regs_mask;
22003 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22004
22005 /* If we can deduce the registers used from the function's return value.
22006 This is more reliable that examining df_regs_ever_live_p () because that
22007 will be set if the register is ever used in the function, not just if
22008 the register is used to hold a return value. */
22009 size = arm_size_return_regs ();
22010
22011 extra_pop = thumb1_extra_regs_pushed (offsets, false);
22012 if (extra_pop > 0)
22013 {
22014 unsigned long extra_mask = (1 << extra_pop) - 1;
22015 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
22016 }
22017
22018 /* The prolog may have pushed some high registers to use as
22019 work registers. e.g. the testsuite file:
22020 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22021 compiles to produce:
22022 push {r4, r5, r6, r7, lr}
22023 mov r7, r9
22024 mov r6, r8
22025 push {r6, r7}
22026 as part of the prolog. We have to undo that pushing here. */
22027
22028 if (high_regs_pushed)
22029 {
22030 unsigned long mask = live_regs_mask & 0xff;
22031 int next_hi_reg;
22032
22033 /* The available low registers depend on the size of the value we are
22034 returning. */
22035 if (size <= 12)
22036 mask |= 1 << 3;
22037 if (size <= 8)
22038 mask |= 1 << 2;
22039
22040 if (mask == 0)
22041 /* Oh dear! We have no low registers into which we can pop
22042 high registers! */
22043 internal_error
22044 ("no low registers available for popping high registers");
22045
22046 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22047 if (live_regs_mask & (1 << next_hi_reg))
22048 break;
22049
22050 while (high_regs_pushed)
22051 {
22052 /* Find lo register(s) into which the high register(s) can
22053 be popped. */
22054 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22055 {
22056 if (mask & (1 << regno))
22057 high_regs_pushed--;
22058 if (high_regs_pushed == 0)
22059 break;
22060 }
22061
22062 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22063
22064 /* Pop the values into the low register(s). */
22065 thumb_pop (asm_out_file, mask);
22066
22067 /* Move the value(s) into the high registers. */
22068 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22069 {
22070 if (mask & (1 << regno))
22071 {
22072 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22073 regno);
22074
22075 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22076 if (live_regs_mask & (1 << next_hi_reg))
22077 break;
22078 }
22079 }
22080 }
22081 live_regs_mask &= ~0x0f00;
22082 }
22083
22084 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22085 live_regs_mask &= 0xff;
22086
22087 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22088 {
22089 /* Pop the return address into the PC. */
22090 if (had_to_push_lr)
22091 live_regs_mask |= 1 << PC_REGNUM;
22092
22093 /* Either no argument registers were pushed or a backtrace
22094 structure was created which includes an adjusted stack
22095 pointer, so just pop everything. */
22096 if (live_regs_mask)
22097 thumb_pop (asm_out_file, live_regs_mask);
22098
22099 /* We have either just popped the return address into the
22100 PC or it is was kept in LR for the entire function.
22101 Note that thumb_pop has already called thumb_exit if the
22102 PC was in the list. */
22103 if (!had_to_push_lr)
22104 thumb_exit (asm_out_file, LR_REGNUM);
22105 }
22106 else
22107 {
22108 /* Pop everything but the return address. */
22109 if (live_regs_mask)
22110 thumb_pop (asm_out_file, live_regs_mask);
22111
22112 if (had_to_push_lr)
22113 {
22114 if (size > 12)
22115 {
22116 /* We have no free low regs, so save one. */
22117 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22118 LAST_ARG_REGNUM);
22119 }
22120
22121 /* Get the return address into a temporary register. */
22122 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22123
22124 if (size > 12)
22125 {
22126 /* Move the return address to lr. */
22127 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22128 LAST_ARG_REGNUM);
22129 /* Restore the low register. */
22130 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22131 IP_REGNUM);
22132 regno = LR_REGNUM;
22133 }
22134 else
22135 regno = LAST_ARG_REGNUM;
22136 }
22137 else
22138 regno = LR_REGNUM;
22139
22140 /* Remove the argument registers that were pushed onto the stack. */
22141 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22142 SP_REGNUM, SP_REGNUM,
22143 crtl->args.pretend_args_size);
22144
22145 thumb_exit (asm_out_file, regno);
22146 }
22147
22148 return "";
22149 }
22150
22151 /* Functions to save and restore machine-specific function data. */
22152 static struct machine_function *
22153 arm_init_machine_status (void)
22154 {
22155 struct machine_function *machine;
22156 machine = ggc_alloc_cleared_machine_function ();
22157
22158 #if ARM_FT_UNKNOWN != 0
22159 machine->func_type = ARM_FT_UNKNOWN;
22160 #endif
22161 return machine;
22162 }
22163
22164 /* Return an RTX indicating where the return address to the
22165 calling function can be found. */
22166 rtx
22167 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22168 {
22169 if (count != 0)
22170 return NULL_RTX;
22171
22172 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22173 }
22174
22175 /* Do anything needed before RTL is emitted for each function. */
22176 void
22177 arm_init_expanders (void)
22178 {
22179 /* Arrange to initialize and mark the machine per-function status. */
22180 init_machine_status = arm_init_machine_status;
22181
22182 /* This is to stop the combine pass optimizing away the alignment
22183 adjustment of va_arg. */
22184 /* ??? It is claimed that this should not be necessary. */
22185 if (cfun)
22186 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22187 }
22188
22189
22190 /* Like arm_compute_initial_elimination offset. Simpler because there
22191 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22192 to point at the base of the local variables after static stack
22193 space for a function has been allocated. */
22194
22195 HOST_WIDE_INT
22196 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22197 {
22198 arm_stack_offsets *offsets;
22199
22200 offsets = arm_get_frame_offsets ();
22201
22202 switch (from)
22203 {
22204 case ARG_POINTER_REGNUM:
22205 switch (to)
22206 {
22207 case STACK_POINTER_REGNUM:
22208 return offsets->outgoing_args - offsets->saved_args;
22209
22210 case FRAME_POINTER_REGNUM:
22211 return offsets->soft_frame - offsets->saved_args;
22212
22213 case ARM_HARD_FRAME_POINTER_REGNUM:
22214 return offsets->saved_regs - offsets->saved_args;
22215
22216 case THUMB_HARD_FRAME_POINTER_REGNUM:
22217 return offsets->locals_base - offsets->saved_args;
22218
22219 default:
22220 gcc_unreachable ();
22221 }
22222 break;
22223
22224 case FRAME_POINTER_REGNUM:
22225 switch (to)
22226 {
22227 case STACK_POINTER_REGNUM:
22228 return offsets->outgoing_args - offsets->soft_frame;
22229
22230 case ARM_HARD_FRAME_POINTER_REGNUM:
22231 return offsets->saved_regs - offsets->soft_frame;
22232
22233 case THUMB_HARD_FRAME_POINTER_REGNUM:
22234 return offsets->locals_base - offsets->soft_frame;
22235
22236 default:
22237 gcc_unreachable ();
22238 }
22239 break;
22240
22241 default:
22242 gcc_unreachable ();
22243 }
22244 }
22245
22246 /* Generate the function's prologue. */
22247
22248 void
22249 thumb1_expand_prologue (void)
22250 {
22251 rtx insn;
22252
22253 HOST_WIDE_INT amount;
22254 arm_stack_offsets *offsets;
22255 unsigned long func_type;
22256 int regno;
22257 unsigned long live_regs_mask;
22258 unsigned long l_mask;
22259 unsigned high_regs_pushed = 0;
22260
22261 func_type = arm_current_func_type ();
22262
22263 /* Naked functions don't have prologues. */
22264 if (IS_NAKED (func_type))
22265 return;
22266
22267 if (IS_INTERRUPT (func_type))
22268 {
22269 error ("interrupt Service Routines cannot be coded in Thumb mode");
22270 return;
22271 }
22272
22273 if (is_called_in_ARM_mode (current_function_decl))
22274 emit_insn (gen_prologue_thumb1_interwork ());
22275
22276 offsets = arm_get_frame_offsets ();
22277 live_regs_mask = offsets->saved_regs_mask;
22278
22279 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22280 l_mask = live_regs_mask & 0x40ff;
22281 /* Then count how many other high registers will need to be pushed. */
22282 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22283
22284 if (crtl->args.pretend_args_size)
22285 {
22286 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22287
22288 if (cfun->machine->uses_anonymous_args)
22289 {
22290 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22291 unsigned long mask;
22292
22293 mask = 1ul << (LAST_ARG_REGNUM + 1);
22294 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22295
22296 insn = thumb1_emit_multi_reg_push (mask, 0);
22297 }
22298 else
22299 {
22300 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22301 stack_pointer_rtx, x));
22302 }
22303 RTX_FRAME_RELATED_P (insn) = 1;
22304 }
22305
22306 if (TARGET_BACKTRACE)
22307 {
22308 HOST_WIDE_INT offset = 0;
22309 unsigned work_register;
22310 rtx work_reg, x, arm_hfp_rtx;
22311
22312 /* We have been asked to create a stack backtrace structure.
22313 The code looks like this:
22314
22315 0 .align 2
22316 0 func:
22317 0 sub SP, #16 Reserve space for 4 registers.
22318 2 push {R7} Push low registers.
22319 4 add R7, SP, #20 Get the stack pointer before the push.
22320 6 str R7, [SP, #8] Store the stack pointer
22321 (before reserving the space).
22322 8 mov R7, PC Get hold of the start of this code + 12.
22323 10 str R7, [SP, #16] Store it.
22324 12 mov R7, FP Get hold of the current frame pointer.
22325 14 str R7, [SP, #4] Store it.
22326 16 mov R7, LR Get hold of the current return address.
22327 18 str R7, [SP, #12] Store it.
22328 20 add R7, SP, #16 Point at the start of the
22329 backtrace structure.
22330 22 mov FP, R7 Put this value into the frame pointer. */
22331
22332 work_register = thumb_find_work_register (live_regs_mask);
22333 work_reg = gen_rtx_REG (SImode, work_register);
22334 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22335
22336 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22337 stack_pointer_rtx, GEN_INT (-16)));
22338 RTX_FRAME_RELATED_P (insn) = 1;
22339
22340 if (l_mask)
22341 {
22342 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22343 RTX_FRAME_RELATED_P (insn) = 1;
22344
22345 offset = bit_count (l_mask) * UNITS_PER_WORD;
22346 }
22347
22348 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22349 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22350
22351 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22352 x = gen_frame_mem (SImode, x);
22353 emit_move_insn (x, work_reg);
22354
22355 /* Make sure that the instruction fetching the PC is in the right place
22356 to calculate "start of backtrace creation code + 12". */
22357 /* ??? The stores using the common WORK_REG ought to be enough to
22358 prevent the scheduler from doing anything weird. Failing that
22359 we could always move all of the following into an UNSPEC_VOLATILE. */
22360 if (l_mask)
22361 {
22362 x = gen_rtx_REG (SImode, PC_REGNUM);
22363 emit_move_insn (work_reg, x);
22364
22365 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22366 x = gen_frame_mem (SImode, x);
22367 emit_move_insn (x, work_reg);
22368
22369 emit_move_insn (work_reg, arm_hfp_rtx);
22370
22371 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22372 x = gen_frame_mem (SImode, x);
22373 emit_move_insn (x, work_reg);
22374 }
22375 else
22376 {
22377 emit_move_insn (work_reg, arm_hfp_rtx);
22378
22379 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22380 x = gen_frame_mem (SImode, x);
22381 emit_move_insn (x, work_reg);
22382
22383 x = gen_rtx_REG (SImode, PC_REGNUM);
22384 emit_move_insn (work_reg, x);
22385
22386 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22387 x = gen_frame_mem (SImode, x);
22388 emit_move_insn (x, work_reg);
22389 }
22390
22391 x = gen_rtx_REG (SImode, LR_REGNUM);
22392 emit_move_insn (work_reg, x);
22393
22394 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22395 x = gen_frame_mem (SImode, x);
22396 emit_move_insn (x, work_reg);
22397
22398 x = GEN_INT (offset + 12);
22399 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22400
22401 emit_move_insn (arm_hfp_rtx, work_reg);
22402 }
22403 /* Optimization: If we are not pushing any low registers but we are going
22404 to push some high registers then delay our first push. This will just
22405 be a push of LR and we can combine it with the push of the first high
22406 register. */
22407 else if ((l_mask & 0xff) != 0
22408 || (high_regs_pushed == 0 && l_mask))
22409 {
22410 unsigned long mask = l_mask;
22411 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22412 insn = thumb1_emit_multi_reg_push (mask, mask);
22413 RTX_FRAME_RELATED_P (insn) = 1;
22414 }
22415
22416 if (high_regs_pushed)
22417 {
22418 unsigned pushable_regs;
22419 unsigned next_hi_reg;
22420
22421 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22422 if (live_regs_mask & (1 << next_hi_reg))
22423 break;
22424
22425 pushable_regs = l_mask & 0xff;
22426
22427 if (pushable_regs == 0)
22428 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22429
22430 while (high_regs_pushed > 0)
22431 {
22432 unsigned long real_regs_mask = 0;
22433
22434 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22435 {
22436 if (pushable_regs & (1 << regno))
22437 {
22438 emit_move_insn (gen_rtx_REG (SImode, regno),
22439 gen_rtx_REG (SImode, next_hi_reg));
22440
22441 high_regs_pushed --;
22442 real_regs_mask |= (1 << next_hi_reg);
22443
22444 if (high_regs_pushed)
22445 {
22446 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22447 next_hi_reg --)
22448 if (live_regs_mask & (1 << next_hi_reg))
22449 break;
22450 }
22451 else
22452 {
22453 pushable_regs &= ~((1 << regno) - 1);
22454 break;
22455 }
22456 }
22457 }
22458
22459 /* If we had to find a work register and we have not yet
22460 saved the LR then add it to the list of regs to push. */
22461 if (l_mask == (1 << LR_REGNUM))
22462 {
22463 pushable_regs |= l_mask;
22464 real_regs_mask |= l_mask;
22465 l_mask = 0;
22466 }
22467
22468 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22469 RTX_FRAME_RELATED_P (insn) = 1;
22470 }
22471 }
22472
22473 /* Load the pic register before setting the frame pointer,
22474 so we can use r7 as a temporary work register. */
22475 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22476 arm_load_pic_register (live_regs_mask);
22477
22478 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22479 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22480 stack_pointer_rtx);
22481
22482 if (flag_stack_usage_info)
22483 current_function_static_stack_size
22484 = offsets->outgoing_args - offsets->saved_args;
22485
22486 amount = offsets->outgoing_args - offsets->saved_regs;
22487 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22488 if (amount)
22489 {
22490 if (amount < 512)
22491 {
22492 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22493 GEN_INT (- amount)));
22494 RTX_FRAME_RELATED_P (insn) = 1;
22495 }
22496 else
22497 {
22498 rtx reg, dwarf;
22499
22500 /* The stack decrement is too big for an immediate value in a single
22501 insn. In theory we could issue multiple subtracts, but after
22502 three of them it becomes more space efficient to place the full
22503 value in the constant pool and load into a register. (Also the
22504 ARM debugger really likes to see only one stack decrement per
22505 function). So instead we look for a scratch register into which
22506 we can load the decrement, and then we subtract this from the
22507 stack pointer. Unfortunately on the thumb the only available
22508 scratch registers are the argument registers, and we cannot use
22509 these as they may hold arguments to the function. Instead we
22510 attempt to locate a call preserved register which is used by this
22511 function. If we can find one, then we know that it will have
22512 been pushed at the start of the prologue and so we can corrupt
22513 it now. */
22514 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22515 if (live_regs_mask & (1 << regno))
22516 break;
22517
22518 gcc_assert(regno <= LAST_LO_REGNUM);
22519
22520 reg = gen_rtx_REG (SImode, regno);
22521
22522 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22523
22524 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22525 stack_pointer_rtx, reg));
22526
22527 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22528 plus_constant (Pmode, stack_pointer_rtx,
22529 -amount));
22530 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22531 RTX_FRAME_RELATED_P (insn) = 1;
22532 }
22533 }
22534
22535 if (frame_pointer_needed)
22536 thumb_set_frame_pointer (offsets);
22537
22538 /* If we are profiling, make sure no instructions are scheduled before
22539 the call to mcount. Similarly if the user has requested no
22540 scheduling in the prolog. Similarly if we want non-call exceptions
22541 using the EABI unwinder, to prevent faulting instructions from being
22542 swapped with a stack adjustment. */
22543 if (crtl->profile || !TARGET_SCHED_PROLOG
22544 || (arm_except_unwind_info (&global_options) == UI_TARGET
22545 && cfun->can_throw_non_call_exceptions))
22546 emit_insn (gen_blockage ());
22547
22548 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22549 if (live_regs_mask & 0xff)
22550 cfun->machine->lr_save_eliminated = 0;
22551 }
22552
22553 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22554 POP instruction can be generated. LR should be replaced by PC. All
22555 the checks required are already done by USE_RETURN_INSN (). Hence,
22556 all we really need to check here is if single register is to be
22557 returned, or multiple register return. */
22558 void
22559 thumb2_expand_return (void)
22560 {
22561 int i, num_regs;
22562 unsigned long saved_regs_mask;
22563 arm_stack_offsets *offsets;
22564
22565 offsets = arm_get_frame_offsets ();
22566 saved_regs_mask = offsets->saved_regs_mask;
22567
22568 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22569 if (saved_regs_mask & (1 << i))
22570 num_regs++;
22571
22572 if (saved_regs_mask)
22573 {
22574 if (num_regs == 1)
22575 {
22576 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22577 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22578 rtx addr = gen_rtx_MEM (SImode,
22579 gen_rtx_POST_INC (SImode,
22580 stack_pointer_rtx));
22581 set_mem_alias_set (addr, get_frame_alias_set ());
22582 XVECEXP (par, 0, 0) = ret_rtx;
22583 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22584 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22585 emit_jump_insn (par);
22586 }
22587 else
22588 {
22589 saved_regs_mask &= ~ (1 << LR_REGNUM);
22590 saved_regs_mask |= (1 << PC_REGNUM);
22591 arm_emit_multi_reg_pop (saved_regs_mask);
22592 }
22593 }
22594 else
22595 {
22596 emit_jump_insn (simple_return_rtx);
22597 }
22598 }
22599
22600 void
22601 thumb1_expand_epilogue (void)
22602 {
22603 HOST_WIDE_INT amount;
22604 arm_stack_offsets *offsets;
22605 int regno;
22606
22607 /* Naked functions don't have prologues. */
22608 if (IS_NAKED (arm_current_func_type ()))
22609 return;
22610
22611 offsets = arm_get_frame_offsets ();
22612 amount = offsets->outgoing_args - offsets->saved_regs;
22613
22614 if (frame_pointer_needed)
22615 {
22616 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22617 amount = offsets->locals_base - offsets->saved_regs;
22618 }
22619 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22620
22621 gcc_assert (amount >= 0);
22622 if (amount)
22623 {
22624 emit_insn (gen_blockage ());
22625
22626 if (amount < 512)
22627 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22628 GEN_INT (amount)));
22629 else
22630 {
22631 /* r3 is always free in the epilogue. */
22632 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22633
22634 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22635 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22636 }
22637 }
22638
22639 /* Emit a USE (stack_pointer_rtx), so that
22640 the stack adjustment will not be deleted. */
22641 emit_insn (gen_prologue_use (stack_pointer_rtx));
22642
22643 if (crtl->profile || !TARGET_SCHED_PROLOG)
22644 emit_insn (gen_blockage ());
22645
22646 /* Emit a clobber for each insn that will be restored in the epilogue,
22647 so that flow2 will get register lifetimes correct. */
22648 for (regno = 0; regno < 13; regno++)
22649 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22650 emit_clobber (gen_rtx_REG (SImode, regno));
22651
22652 if (! df_regs_ever_live_p (LR_REGNUM))
22653 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22654 }
22655
22656 /* Epilogue code for APCS frame. */
22657 static void
22658 arm_expand_epilogue_apcs_frame (bool really_return)
22659 {
22660 unsigned long func_type;
22661 unsigned long saved_regs_mask;
22662 int num_regs = 0;
22663 int i;
22664 int floats_from_frame = 0;
22665 arm_stack_offsets *offsets;
22666
22667 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22668 func_type = arm_current_func_type ();
22669
22670 /* Get frame offsets for ARM. */
22671 offsets = arm_get_frame_offsets ();
22672 saved_regs_mask = offsets->saved_regs_mask;
22673
22674 /* Find the offset of the floating-point save area in the frame. */
22675 floats_from_frame = offsets->saved_args - offsets->frame;
22676
22677 /* Compute how many core registers saved and how far away the floats are. */
22678 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22679 if (saved_regs_mask & (1 << i))
22680 {
22681 num_regs++;
22682 floats_from_frame += 4;
22683 }
22684
22685 if (TARGET_HARD_FLOAT && TARGET_VFP)
22686 {
22687 int start_reg;
22688
22689 /* The offset is from IP_REGNUM. */
22690 int saved_size = arm_get_vfp_saved_size ();
22691 if (saved_size > 0)
22692 {
22693 floats_from_frame += saved_size;
22694 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22695 hard_frame_pointer_rtx,
22696 GEN_INT (-floats_from_frame)));
22697 }
22698
22699 /* Generate VFP register multi-pop. */
22700 start_reg = FIRST_VFP_REGNUM;
22701
22702 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22703 /* Look for a case where a reg does not need restoring. */
22704 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22705 && (!df_regs_ever_live_p (i + 1)
22706 || call_used_regs[i + 1]))
22707 {
22708 if (start_reg != i)
22709 arm_emit_vfp_multi_reg_pop (start_reg,
22710 (i - start_reg) / 2,
22711 gen_rtx_REG (SImode,
22712 IP_REGNUM));
22713 start_reg = i + 2;
22714 }
22715
22716 /* Restore the remaining regs that we have discovered (or possibly
22717 even all of them, if the conditional in the for loop never
22718 fired). */
22719 if (start_reg != i)
22720 arm_emit_vfp_multi_reg_pop (start_reg,
22721 (i - start_reg) / 2,
22722 gen_rtx_REG (SImode, IP_REGNUM));
22723 }
22724
22725 if (TARGET_IWMMXT)
22726 {
22727 /* The frame pointer is guaranteed to be non-double-word aligned, as
22728 it is set to double-word-aligned old_stack_pointer - 4. */
22729 rtx insn;
22730 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22731
22732 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22733 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22734 {
22735 rtx addr = gen_frame_mem (V2SImode,
22736 plus_constant (Pmode, hard_frame_pointer_rtx,
22737 - lrm_count * 4));
22738 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22739 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22740 gen_rtx_REG (V2SImode, i),
22741 NULL_RTX);
22742 lrm_count += 2;
22743 }
22744 }
22745
22746 /* saved_regs_mask should contain IP which contains old stack pointer
22747 at the time of activation creation. Since SP and IP are adjacent registers,
22748 we can restore the value directly into SP. */
22749 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22750 saved_regs_mask &= ~(1 << IP_REGNUM);
22751 saved_regs_mask |= (1 << SP_REGNUM);
22752
22753 /* There are two registers left in saved_regs_mask - LR and PC. We
22754 only need to restore LR (the return address), but to
22755 save time we can load it directly into PC, unless we need a
22756 special function exit sequence, or we are not really returning. */
22757 if (really_return
22758 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22759 && !crtl->calls_eh_return)
22760 /* Delete LR from the register mask, so that LR on
22761 the stack is loaded into the PC in the register mask. */
22762 saved_regs_mask &= ~(1 << LR_REGNUM);
22763 else
22764 saved_regs_mask &= ~(1 << PC_REGNUM);
22765
22766 num_regs = bit_count (saved_regs_mask);
22767 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22768 {
22769 /* Unwind the stack to just below the saved registers. */
22770 emit_insn (gen_addsi3 (stack_pointer_rtx,
22771 hard_frame_pointer_rtx,
22772 GEN_INT (- 4 * num_regs)));
22773 }
22774
22775 arm_emit_multi_reg_pop (saved_regs_mask);
22776
22777 if (IS_INTERRUPT (func_type))
22778 {
22779 /* Interrupt handlers will have pushed the
22780 IP onto the stack, so restore it now. */
22781 rtx insn;
22782 rtx addr = gen_rtx_MEM (SImode,
22783 gen_rtx_POST_INC (SImode,
22784 stack_pointer_rtx));
22785 set_mem_alias_set (addr, get_frame_alias_set ());
22786 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22787 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22788 gen_rtx_REG (SImode, IP_REGNUM),
22789 NULL_RTX);
22790 }
22791
22792 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22793 return;
22794
22795 if (crtl->calls_eh_return)
22796 emit_insn (gen_addsi3 (stack_pointer_rtx,
22797 stack_pointer_rtx,
22798 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22799
22800 if (IS_STACKALIGN (func_type))
22801 /* Restore the original stack pointer. Before prologue, the stack was
22802 realigned and the original stack pointer saved in r0. For details,
22803 see comment in arm_expand_prologue. */
22804 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22805
22806 emit_jump_insn (simple_return_rtx);
22807 }
22808
22809 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22810 function is not a sibcall. */
22811 void
22812 arm_expand_epilogue (bool really_return)
22813 {
22814 unsigned long func_type;
22815 unsigned long saved_regs_mask;
22816 int num_regs = 0;
22817 int i;
22818 int amount;
22819 arm_stack_offsets *offsets;
22820
22821 func_type = arm_current_func_type ();
22822
22823 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22824 let output_return_instruction take care of instruction emition if any. */
22825 if (IS_NAKED (func_type)
22826 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22827 {
22828 emit_jump_insn (simple_return_rtx);
22829 return;
22830 }
22831
22832 /* If we are throwing an exception, then we really must be doing a
22833 return, so we can't tail-call. */
22834 gcc_assert (!crtl->calls_eh_return || really_return);
22835
22836 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22837 {
22838 arm_expand_epilogue_apcs_frame (really_return);
22839 return;
22840 }
22841
22842 /* Get frame offsets for ARM. */
22843 offsets = arm_get_frame_offsets ();
22844 saved_regs_mask = offsets->saved_regs_mask;
22845 num_regs = bit_count (saved_regs_mask);
22846
22847 if (frame_pointer_needed)
22848 {
22849 /* Restore stack pointer if necessary. */
22850 if (TARGET_ARM)
22851 {
22852 /* In ARM mode, frame pointer points to first saved register.
22853 Restore stack pointer to last saved register. */
22854 amount = offsets->frame - offsets->saved_regs;
22855
22856 /* Force out any pending memory operations that reference stacked data
22857 before stack de-allocation occurs. */
22858 emit_insn (gen_blockage ());
22859 emit_insn (gen_addsi3 (stack_pointer_rtx,
22860 hard_frame_pointer_rtx,
22861 GEN_INT (amount)));
22862
22863 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22864 deleted. */
22865 emit_insn (gen_prologue_use (stack_pointer_rtx));
22866 }
22867 else
22868 {
22869 /* In Thumb-2 mode, the frame pointer points to the last saved
22870 register. */
22871 amount = offsets->locals_base - offsets->saved_regs;
22872 if (amount)
22873 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22874 hard_frame_pointer_rtx,
22875 GEN_INT (amount)));
22876
22877 /* Force out any pending memory operations that reference stacked data
22878 before stack de-allocation occurs. */
22879 emit_insn (gen_blockage ());
22880 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22881 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22882 deleted. */
22883 emit_insn (gen_prologue_use (stack_pointer_rtx));
22884 }
22885 }
22886 else
22887 {
22888 /* Pop off outgoing args and local frame to adjust stack pointer to
22889 last saved register. */
22890 amount = offsets->outgoing_args - offsets->saved_regs;
22891 if (amount)
22892 {
22893 /* Force out any pending memory operations that reference stacked data
22894 before stack de-allocation occurs. */
22895 emit_insn (gen_blockage ());
22896 emit_insn (gen_addsi3 (stack_pointer_rtx,
22897 stack_pointer_rtx,
22898 GEN_INT (amount)));
22899 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22900 not deleted. */
22901 emit_insn (gen_prologue_use (stack_pointer_rtx));
22902 }
22903 }
22904
22905 if (TARGET_HARD_FLOAT && TARGET_VFP)
22906 {
22907 /* Generate VFP register multi-pop. */
22908 int end_reg = LAST_VFP_REGNUM + 1;
22909
22910 /* Scan the registers in reverse order. We need to match
22911 any groupings made in the prologue and generate matching
22912 vldm operations. The need to match groups is because,
22913 unlike pop, vldm can only do consecutive regs. */
22914 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22915 /* Look for a case where a reg does not need restoring. */
22916 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22917 && (!df_regs_ever_live_p (i + 1)
22918 || call_used_regs[i + 1]))
22919 {
22920 /* Restore the regs discovered so far (from reg+2 to
22921 end_reg). */
22922 if (end_reg > i + 2)
22923 arm_emit_vfp_multi_reg_pop (i + 2,
22924 (end_reg - (i + 2)) / 2,
22925 stack_pointer_rtx);
22926 end_reg = i;
22927 }
22928
22929 /* Restore the remaining regs that we have discovered (or possibly
22930 even all of them, if the conditional in the for loop never
22931 fired). */
22932 if (end_reg > i + 2)
22933 arm_emit_vfp_multi_reg_pop (i + 2,
22934 (end_reg - (i + 2)) / 2,
22935 stack_pointer_rtx);
22936 }
22937
22938 if (TARGET_IWMMXT)
22939 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
22940 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22941 {
22942 rtx insn;
22943 rtx addr = gen_rtx_MEM (V2SImode,
22944 gen_rtx_POST_INC (SImode,
22945 stack_pointer_rtx));
22946 set_mem_alias_set (addr, get_frame_alias_set ());
22947 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22948 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22949 gen_rtx_REG (V2SImode, i),
22950 NULL_RTX);
22951 }
22952
22953 if (saved_regs_mask)
22954 {
22955 rtx insn;
22956 bool return_in_pc = false;
22957
22958 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
22959 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
22960 && !IS_STACKALIGN (func_type)
22961 && really_return
22962 && crtl->args.pretend_args_size == 0
22963 && saved_regs_mask & (1 << LR_REGNUM)
22964 && !crtl->calls_eh_return)
22965 {
22966 saved_regs_mask &= ~(1 << LR_REGNUM);
22967 saved_regs_mask |= (1 << PC_REGNUM);
22968 return_in_pc = true;
22969 }
22970
22971 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
22972 {
22973 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22974 if (saved_regs_mask & (1 << i))
22975 {
22976 rtx addr = gen_rtx_MEM (SImode,
22977 gen_rtx_POST_INC (SImode,
22978 stack_pointer_rtx));
22979 set_mem_alias_set (addr, get_frame_alias_set ());
22980
22981 if (i == PC_REGNUM)
22982 {
22983 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22984 XVECEXP (insn, 0, 0) = ret_rtx;
22985 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
22986 gen_rtx_REG (SImode, i),
22987 addr);
22988 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
22989 insn = emit_jump_insn (insn);
22990 }
22991 else
22992 {
22993 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
22994 addr));
22995 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22996 gen_rtx_REG (SImode, i),
22997 NULL_RTX);
22998 }
22999 }
23000 }
23001 else
23002 {
23003 arm_emit_multi_reg_pop (saved_regs_mask);
23004 }
23005
23006 if (return_in_pc == true)
23007 return;
23008 }
23009
23010 if (crtl->args.pretend_args_size)
23011 emit_insn (gen_addsi3 (stack_pointer_rtx,
23012 stack_pointer_rtx,
23013 GEN_INT (crtl->args.pretend_args_size)));
23014
23015 if (!really_return)
23016 return;
23017
23018 if (crtl->calls_eh_return)
23019 emit_insn (gen_addsi3 (stack_pointer_rtx,
23020 stack_pointer_rtx,
23021 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23022
23023 if (IS_STACKALIGN (func_type))
23024 /* Restore the original stack pointer. Before prologue, the stack was
23025 realigned and the original stack pointer saved in r0. For details,
23026 see comment in arm_expand_prologue. */
23027 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23028
23029 emit_jump_insn (simple_return_rtx);
23030 }
23031
23032 /* Implementation of insn prologue_thumb1_interwork. This is the first
23033 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23034
23035 const char *
23036 thumb1_output_interwork (void)
23037 {
23038 const char * name;
23039 FILE *f = asm_out_file;
23040
23041 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23042 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23043 == SYMBOL_REF);
23044 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23045
23046 /* Generate code sequence to switch us into Thumb mode. */
23047 /* The .code 32 directive has already been emitted by
23048 ASM_DECLARE_FUNCTION_NAME. */
23049 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23050 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23051
23052 /* Generate a label, so that the debugger will notice the
23053 change in instruction sets. This label is also used by
23054 the assembler to bypass the ARM code when this function
23055 is called from a Thumb encoded function elsewhere in the
23056 same file. Hence the definition of STUB_NAME here must
23057 agree with the definition in gas/config/tc-arm.c. */
23058
23059 #define STUB_NAME ".real_start_of"
23060
23061 fprintf (f, "\t.code\t16\n");
23062 #ifdef ARM_PE
23063 if (arm_dllexport_name_p (name))
23064 name = arm_strip_name_encoding (name);
23065 #endif
23066 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23067 fprintf (f, "\t.thumb_func\n");
23068 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23069
23070 return "";
23071 }
23072
23073 /* Handle the case of a double word load into a low register from
23074 a computed memory address. The computed address may involve a
23075 register which is overwritten by the load. */
23076 const char *
23077 thumb_load_double_from_address (rtx *operands)
23078 {
23079 rtx addr;
23080 rtx base;
23081 rtx offset;
23082 rtx arg1;
23083 rtx arg2;
23084
23085 gcc_assert (REG_P (operands[0]));
23086 gcc_assert (MEM_P (operands[1]));
23087
23088 /* Get the memory address. */
23089 addr = XEXP (operands[1], 0);
23090
23091 /* Work out how the memory address is computed. */
23092 switch (GET_CODE (addr))
23093 {
23094 case REG:
23095 operands[2] = adjust_address (operands[1], SImode, 4);
23096
23097 if (REGNO (operands[0]) == REGNO (addr))
23098 {
23099 output_asm_insn ("ldr\t%H0, %2", operands);
23100 output_asm_insn ("ldr\t%0, %1", operands);
23101 }
23102 else
23103 {
23104 output_asm_insn ("ldr\t%0, %1", operands);
23105 output_asm_insn ("ldr\t%H0, %2", operands);
23106 }
23107 break;
23108
23109 case CONST:
23110 /* Compute <address> + 4 for the high order load. */
23111 operands[2] = adjust_address (operands[1], SImode, 4);
23112
23113 output_asm_insn ("ldr\t%0, %1", operands);
23114 output_asm_insn ("ldr\t%H0, %2", operands);
23115 break;
23116
23117 case PLUS:
23118 arg1 = XEXP (addr, 0);
23119 arg2 = XEXP (addr, 1);
23120
23121 if (CONSTANT_P (arg1))
23122 base = arg2, offset = arg1;
23123 else
23124 base = arg1, offset = arg2;
23125
23126 gcc_assert (REG_P (base));
23127
23128 /* Catch the case of <address> = <reg> + <reg> */
23129 if (REG_P (offset))
23130 {
23131 int reg_offset = REGNO (offset);
23132 int reg_base = REGNO (base);
23133 int reg_dest = REGNO (operands[0]);
23134
23135 /* Add the base and offset registers together into the
23136 higher destination register. */
23137 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23138 reg_dest + 1, reg_base, reg_offset);
23139
23140 /* Load the lower destination register from the address in
23141 the higher destination register. */
23142 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23143 reg_dest, reg_dest + 1);
23144
23145 /* Load the higher destination register from its own address
23146 plus 4. */
23147 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23148 reg_dest + 1, reg_dest + 1);
23149 }
23150 else
23151 {
23152 /* Compute <address> + 4 for the high order load. */
23153 operands[2] = adjust_address (operands[1], SImode, 4);
23154
23155 /* If the computed address is held in the low order register
23156 then load the high order register first, otherwise always
23157 load the low order register first. */
23158 if (REGNO (operands[0]) == REGNO (base))
23159 {
23160 output_asm_insn ("ldr\t%H0, %2", operands);
23161 output_asm_insn ("ldr\t%0, %1", operands);
23162 }
23163 else
23164 {
23165 output_asm_insn ("ldr\t%0, %1", operands);
23166 output_asm_insn ("ldr\t%H0, %2", operands);
23167 }
23168 }
23169 break;
23170
23171 case LABEL_REF:
23172 /* With no registers to worry about we can just load the value
23173 directly. */
23174 operands[2] = adjust_address (operands[1], SImode, 4);
23175
23176 output_asm_insn ("ldr\t%H0, %2", operands);
23177 output_asm_insn ("ldr\t%0, %1", operands);
23178 break;
23179
23180 default:
23181 gcc_unreachable ();
23182 }
23183
23184 return "";
23185 }
23186
23187 const char *
23188 thumb_output_move_mem_multiple (int n, rtx *operands)
23189 {
23190 rtx tmp;
23191
23192 switch (n)
23193 {
23194 case 2:
23195 if (REGNO (operands[4]) > REGNO (operands[5]))
23196 {
23197 tmp = operands[4];
23198 operands[4] = operands[5];
23199 operands[5] = tmp;
23200 }
23201 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23202 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23203 break;
23204
23205 case 3:
23206 if (REGNO (operands[4]) > REGNO (operands[5]))
23207 {
23208 tmp = operands[4];
23209 operands[4] = operands[5];
23210 operands[5] = tmp;
23211 }
23212 if (REGNO (operands[5]) > REGNO (operands[6]))
23213 {
23214 tmp = operands[5];
23215 operands[5] = operands[6];
23216 operands[6] = tmp;
23217 }
23218 if (REGNO (operands[4]) > REGNO (operands[5]))
23219 {
23220 tmp = operands[4];
23221 operands[4] = operands[5];
23222 operands[5] = tmp;
23223 }
23224
23225 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23226 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23227 break;
23228
23229 default:
23230 gcc_unreachable ();
23231 }
23232
23233 return "";
23234 }
23235
23236 /* Output a call-via instruction for thumb state. */
23237 const char *
23238 thumb_call_via_reg (rtx reg)
23239 {
23240 int regno = REGNO (reg);
23241 rtx *labelp;
23242
23243 gcc_assert (regno < LR_REGNUM);
23244
23245 /* If we are in the normal text section we can use a single instance
23246 per compilation unit. If we are doing function sections, then we need
23247 an entry per section, since we can't rely on reachability. */
23248 if (in_section == text_section)
23249 {
23250 thumb_call_reg_needed = 1;
23251
23252 if (thumb_call_via_label[regno] == NULL)
23253 thumb_call_via_label[regno] = gen_label_rtx ();
23254 labelp = thumb_call_via_label + regno;
23255 }
23256 else
23257 {
23258 if (cfun->machine->call_via[regno] == NULL)
23259 cfun->machine->call_via[regno] = gen_label_rtx ();
23260 labelp = cfun->machine->call_via + regno;
23261 }
23262
23263 output_asm_insn ("bl\t%a0", labelp);
23264 return "";
23265 }
23266
23267 /* Routines for generating rtl. */
23268 void
23269 thumb_expand_movmemqi (rtx *operands)
23270 {
23271 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23272 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23273 HOST_WIDE_INT len = INTVAL (operands[2]);
23274 HOST_WIDE_INT offset = 0;
23275
23276 while (len >= 12)
23277 {
23278 emit_insn (gen_movmem12b (out, in, out, in));
23279 len -= 12;
23280 }
23281
23282 if (len >= 8)
23283 {
23284 emit_insn (gen_movmem8b (out, in, out, in));
23285 len -= 8;
23286 }
23287
23288 if (len >= 4)
23289 {
23290 rtx reg = gen_reg_rtx (SImode);
23291 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23292 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23293 len -= 4;
23294 offset += 4;
23295 }
23296
23297 if (len >= 2)
23298 {
23299 rtx reg = gen_reg_rtx (HImode);
23300 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23301 plus_constant (Pmode, in,
23302 offset))));
23303 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23304 offset)),
23305 reg));
23306 len -= 2;
23307 offset += 2;
23308 }
23309
23310 if (len)
23311 {
23312 rtx reg = gen_reg_rtx (QImode);
23313 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23314 plus_constant (Pmode, in,
23315 offset))));
23316 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23317 offset)),
23318 reg));
23319 }
23320 }
23321
23322 void
23323 thumb_reload_out_hi (rtx *operands)
23324 {
23325 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23326 }
23327
23328 /* Handle reading a half-word from memory during reload. */
23329 void
23330 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23331 {
23332 gcc_unreachable ();
23333 }
23334
23335 /* Return the length of a function name prefix
23336 that starts with the character 'c'. */
23337 static int
23338 arm_get_strip_length (int c)
23339 {
23340 switch (c)
23341 {
23342 ARM_NAME_ENCODING_LENGTHS
23343 default: return 0;
23344 }
23345 }
23346
23347 /* Return a pointer to a function's name with any
23348 and all prefix encodings stripped from it. */
23349 const char *
23350 arm_strip_name_encoding (const char *name)
23351 {
23352 int skip;
23353
23354 while ((skip = arm_get_strip_length (* name)))
23355 name += skip;
23356
23357 return name;
23358 }
23359
23360 /* If there is a '*' anywhere in the name's prefix, then
23361 emit the stripped name verbatim, otherwise prepend an
23362 underscore if leading underscores are being used. */
23363 void
23364 arm_asm_output_labelref (FILE *stream, const char *name)
23365 {
23366 int skip;
23367 int verbatim = 0;
23368
23369 while ((skip = arm_get_strip_length (* name)))
23370 {
23371 verbatim |= (*name == '*');
23372 name += skip;
23373 }
23374
23375 if (verbatim)
23376 fputs (name, stream);
23377 else
23378 asm_fprintf (stream, "%U%s", name);
23379 }
23380
23381 /* This function is used to emit an EABI tag and its associated value.
23382 We emit the numerical value of the tag in case the assembler does not
23383 support textual tags. (Eg gas prior to 2.20). If requested we include
23384 the tag name in a comment so that anyone reading the assembler output
23385 will know which tag is being set.
23386
23387 This function is not static because arm-c.c needs it too. */
23388
23389 void
23390 arm_emit_eabi_attribute (const char *name, int num, int val)
23391 {
23392 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23393 if (flag_verbose_asm || flag_debug_asm)
23394 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23395 asm_fprintf (asm_out_file, "\n");
23396 }
23397
23398 static void
23399 arm_file_start (void)
23400 {
23401 int val;
23402
23403 if (TARGET_UNIFIED_ASM)
23404 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23405
23406 if (TARGET_BPABI)
23407 {
23408 const char *fpu_name;
23409 if (arm_selected_arch)
23410 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23411 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23412 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23413 else
23414 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23415
23416 if (TARGET_SOFT_FLOAT)
23417 {
23418 fpu_name = "softvfp";
23419 }
23420 else
23421 {
23422 fpu_name = arm_fpu_desc->name;
23423 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23424 {
23425 if (TARGET_HARD_FLOAT)
23426 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23427 if (TARGET_HARD_FLOAT_ABI)
23428 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23429 }
23430 }
23431 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23432
23433 /* Some of these attributes only apply when the corresponding features
23434 are used. However we don't have any easy way of figuring this out.
23435 Conservatively record the setting that would have been used. */
23436
23437 if (flag_rounding_math)
23438 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23439
23440 if (!flag_unsafe_math_optimizations)
23441 {
23442 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23443 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23444 }
23445 if (flag_signaling_nans)
23446 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23447
23448 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23449 flag_finite_math_only ? 1 : 3);
23450
23451 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23452 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23453 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23454 flag_short_enums ? 1 : 2);
23455
23456 /* Tag_ABI_optimization_goals. */
23457 if (optimize_size)
23458 val = 4;
23459 else if (optimize >= 2)
23460 val = 2;
23461 else if (optimize)
23462 val = 1;
23463 else
23464 val = 6;
23465 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23466
23467 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23468 unaligned_access);
23469
23470 if (arm_fp16_format)
23471 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23472 (int) arm_fp16_format);
23473
23474 if (arm_lang_output_object_attributes_hook)
23475 arm_lang_output_object_attributes_hook();
23476 }
23477
23478 default_file_start ();
23479 }
23480
23481 static void
23482 arm_file_end (void)
23483 {
23484 int regno;
23485
23486 if (NEED_INDICATE_EXEC_STACK)
23487 /* Add .note.GNU-stack. */
23488 file_end_indicate_exec_stack ();
23489
23490 if (! thumb_call_reg_needed)
23491 return;
23492
23493 switch_to_section (text_section);
23494 asm_fprintf (asm_out_file, "\t.code 16\n");
23495 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23496
23497 for (regno = 0; regno < LR_REGNUM; regno++)
23498 {
23499 rtx label = thumb_call_via_label[regno];
23500
23501 if (label != 0)
23502 {
23503 targetm.asm_out.internal_label (asm_out_file, "L",
23504 CODE_LABEL_NUMBER (label));
23505 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23506 }
23507 }
23508 }
23509
23510 #ifndef ARM_PE
23511 /* Symbols in the text segment can be accessed without indirecting via the
23512 constant pool; it may take an extra binary operation, but this is still
23513 faster than indirecting via memory. Don't do this when not optimizing,
23514 since we won't be calculating al of the offsets necessary to do this
23515 simplification. */
23516
23517 static void
23518 arm_encode_section_info (tree decl, rtx rtl, int first)
23519 {
23520 if (optimize > 0 && TREE_CONSTANT (decl))
23521 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23522
23523 default_encode_section_info (decl, rtl, first);
23524 }
23525 #endif /* !ARM_PE */
23526
23527 static void
23528 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23529 {
23530 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23531 && !strcmp (prefix, "L"))
23532 {
23533 arm_ccfsm_state = 0;
23534 arm_target_insn = NULL;
23535 }
23536 default_internal_label (stream, prefix, labelno);
23537 }
23538
23539 /* Output code to add DELTA to the first argument, and then jump
23540 to FUNCTION. Used for C++ multiple inheritance. */
23541 static void
23542 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23543 HOST_WIDE_INT delta,
23544 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23545 tree function)
23546 {
23547 static int thunk_label = 0;
23548 char label[256];
23549 char labelpc[256];
23550 int mi_delta = delta;
23551 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23552 int shift = 0;
23553 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23554 ? 1 : 0);
23555 if (mi_delta < 0)
23556 mi_delta = - mi_delta;
23557
23558 if (TARGET_THUMB1)
23559 {
23560 int labelno = thunk_label++;
23561 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23562 /* Thunks are entered in arm mode when avaiable. */
23563 if (TARGET_THUMB1_ONLY)
23564 {
23565 /* push r3 so we can use it as a temporary. */
23566 /* TODO: Omit this save if r3 is not used. */
23567 fputs ("\tpush {r3}\n", file);
23568 fputs ("\tldr\tr3, ", file);
23569 }
23570 else
23571 {
23572 fputs ("\tldr\tr12, ", file);
23573 }
23574 assemble_name (file, label);
23575 fputc ('\n', file);
23576 if (flag_pic)
23577 {
23578 /* If we are generating PIC, the ldr instruction below loads
23579 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23580 the address of the add + 8, so we have:
23581
23582 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23583 = target + 1.
23584
23585 Note that we have "+ 1" because some versions of GNU ld
23586 don't set the low bit of the result for R_ARM_REL32
23587 relocations against thumb function symbols.
23588 On ARMv6M this is +4, not +8. */
23589 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23590 assemble_name (file, labelpc);
23591 fputs (":\n", file);
23592 if (TARGET_THUMB1_ONLY)
23593 {
23594 /* This is 2 insns after the start of the thunk, so we know it
23595 is 4-byte aligned. */
23596 fputs ("\tadd\tr3, pc, r3\n", file);
23597 fputs ("\tmov r12, r3\n", file);
23598 }
23599 else
23600 fputs ("\tadd\tr12, pc, r12\n", file);
23601 }
23602 else if (TARGET_THUMB1_ONLY)
23603 fputs ("\tmov r12, r3\n", file);
23604 }
23605 if (TARGET_THUMB1_ONLY)
23606 {
23607 if (mi_delta > 255)
23608 {
23609 fputs ("\tldr\tr3, ", file);
23610 assemble_name (file, label);
23611 fputs ("+4\n", file);
23612 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23613 mi_op, this_regno, this_regno);
23614 }
23615 else if (mi_delta != 0)
23616 {
23617 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23618 mi_op, this_regno, this_regno,
23619 mi_delta);
23620 }
23621 }
23622 else
23623 {
23624 /* TODO: Use movw/movt for large constants when available. */
23625 while (mi_delta != 0)
23626 {
23627 if ((mi_delta & (3 << shift)) == 0)
23628 shift += 2;
23629 else
23630 {
23631 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23632 mi_op, this_regno, this_regno,
23633 mi_delta & (0xff << shift));
23634 mi_delta &= ~(0xff << shift);
23635 shift += 8;
23636 }
23637 }
23638 }
23639 if (TARGET_THUMB1)
23640 {
23641 if (TARGET_THUMB1_ONLY)
23642 fputs ("\tpop\t{r3}\n", file);
23643
23644 fprintf (file, "\tbx\tr12\n");
23645 ASM_OUTPUT_ALIGN (file, 2);
23646 assemble_name (file, label);
23647 fputs (":\n", file);
23648 if (flag_pic)
23649 {
23650 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23651 rtx tem = XEXP (DECL_RTL (function), 0);
23652 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23653 tem = gen_rtx_MINUS (GET_MODE (tem),
23654 tem,
23655 gen_rtx_SYMBOL_REF (Pmode,
23656 ggc_strdup (labelpc)));
23657 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23658 }
23659 else
23660 /* Output ".word .LTHUNKn". */
23661 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23662
23663 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23664 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23665 }
23666 else
23667 {
23668 fputs ("\tb\t", file);
23669 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23670 if (NEED_PLT_RELOC)
23671 fputs ("(PLT)", file);
23672 fputc ('\n', file);
23673 }
23674 }
23675
23676 int
23677 arm_emit_vector_const (FILE *file, rtx x)
23678 {
23679 int i;
23680 const char * pattern;
23681
23682 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23683
23684 switch (GET_MODE (x))
23685 {
23686 case V2SImode: pattern = "%08x"; break;
23687 case V4HImode: pattern = "%04x"; break;
23688 case V8QImode: pattern = "%02x"; break;
23689 default: gcc_unreachable ();
23690 }
23691
23692 fprintf (file, "0x");
23693 for (i = CONST_VECTOR_NUNITS (x); i--;)
23694 {
23695 rtx element;
23696
23697 element = CONST_VECTOR_ELT (x, i);
23698 fprintf (file, pattern, INTVAL (element));
23699 }
23700
23701 return 1;
23702 }
23703
23704 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23705 HFmode constant pool entries are actually loaded with ldr. */
23706 void
23707 arm_emit_fp16_const (rtx c)
23708 {
23709 REAL_VALUE_TYPE r;
23710 long bits;
23711
23712 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23713 bits = real_to_target (NULL, &r, HFmode);
23714 if (WORDS_BIG_ENDIAN)
23715 assemble_zeros (2);
23716 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23717 if (!WORDS_BIG_ENDIAN)
23718 assemble_zeros (2);
23719 }
23720
23721 const char *
23722 arm_output_load_gr (rtx *operands)
23723 {
23724 rtx reg;
23725 rtx offset;
23726 rtx wcgr;
23727 rtx sum;
23728
23729 if (!MEM_P (operands [1])
23730 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23731 || !REG_P (reg = XEXP (sum, 0))
23732 || !CONST_INT_P (offset = XEXP (sum, 1))
23733 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23734 return "wldrw%?\t%0, %1";
23735
23736 /* Fix up an out-of-range load of a GR register. */
23737 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23738 wcgr = operands[0];
23739 operands[0] = reg;
23740 output_asm_insn ("ldr%?\t%0, %1", operands);
23741
23742 operands[0] = wcgr;
23743 operands[1] = reg;
23744 output_asm_insn ("tmcr%?\t%0, %1", operands);
23745 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23746
23747 return "";
23748 }
23749
23750 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23751
23752 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23753 named arg and all anonymous args onto the stack.
23754 XXX I know the prologue shouldn't be pushing registers, but it is faster
23755 that way. */
23756
23757 static void
23758 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23759 enum machine_mode mode,
23760 tree type,
23761 int *pretend_size,
23762 int second_time ATTRIBUTE_UNUSED)
23763 {
23764 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23765 int nregs;
23766
23767 cfun->machine->uses_anonymous_args = 1;
23768 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23769 {
23770 nregs = pcum->aapcs_ncrn;
23771 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23772 nregs++;
23773 }
23774 else
23775 nregs = pcum->nregs;
23776
23777 if (nregs < NUM_ARG_REGS)
23778 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23779 }
23780
23781 /* Return nonzero if the CONSUMER instruction (a store) does not need
23782 PRODUCER's value to calculate the address. */
23783
23784 int
23785 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23786 {
23787 rtx value = PATTERN (producer);
23788 rtx addr = PATTERN (consumer);
23789
23790 if (GET_CODE (value) == COND_EXEC)
23791 value = COND_EXEC_CODE (value);
23792 if (GET_CODE (value) == PARALLEL)
23793 value = XVECEXP (value, 0, 0);
23794 value = XEXP (value, 0);
23795 if (GET_CODE (addr) == COND_EXEC)
23796 addr = COND_EXEC_CODE (addr);
23797 if (GET_CODE (addr) == PARALLEL)
23798 addr = XVECEXP (addr, 0, 0);
23799 addr = XEXP (addr, 0);
23800
23801 return !reg_overlap_mentioned_p (value, addr);
23802 }
23803
23804 /* Return nonzero if the CONSUMER instruction (a store) does need
23805 PRODUCER's value to calculate the address. */
23806
23807 int
23808 arm_early_store_addr_dep (rtx producer, rtx consumer)
23809 {
23810 return !arm_no_early_store_addr_dep (producer, consumer);
23811 }
23812
23813 /* Return nonzero if the CONSUMER instruction (a load) does need
23814 PRODUCER's value to calculate the address. */
23815
23816 int
23817 arm_early_load_addr_dep (rtx producer, rtx consumer)
23818 {
23819 rtx value = PATTERN (producer);
23820 rtx addr = PATTERN (consumer);
23821
23822 if (GET_CODE (value) == COND_EXEC)
23823 value = COND_EXEC_CODE (value);
23824 if (GET_CODE (value) == PARALLEL)
23825 value = XVECEXP (value, 0, 0);
23826 value = XEXP (value, 0);
23827 if (GET_CODE (addr) == COND_EXEC)
23828 addr = COND_EXEC_CODE (addr);
23829 if (GET_CODE (addr) == PARALLEL)
23830 {
23831 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
23832 addr = XVECEXP (addr, 0, 1);
23833 else
23834 addr = XVECEXP (addr, 0, 0);
23835 }
23836 addr = XEXP (addr, 1);
23837
23838 return reg_overlap_mentioned_p (value, addr);
23839 }
23840
23841 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23842 have an early register shift value or amount dependency on the
23843 result of PRODUCER. */
23844
23845 int
23846 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23847 {
23848 rtx value = PATTERN (producer);
23849 rtx op = PATTERN (consumer);
23850 rtx early_op;
23851
23852 if (GET_CODE (value) == COND_EXEC)
23853 value = COND_EXEC_CODE (value);
23854 if (GET_CODE (value) == PARALLEL)
23855 value = XVECEXP (value, 0, 0);
23856 value = XEXP (value, 0);
23857 if (GET_CODE (op) == COND_EXEC)
23858 op = COND_EXEC_CODE (op);
23859 if (GET_CODE (op) == PARALLEL)
23860 op = XVECEXP (op, 0, 0);
23861 op = XEXP (op, 1);
23862
23863 early_op = XEXP (op, 0);
23864 /* This is either an actual independent shift, or a shift applied to
23865 the first operand of another operation. We want the whole shift
23866 operation. */
23867 if (REG_P (early_op))
23868 early_op = op;
23869
23870 return !reg_overlap_mentioned_p (value, early_op);
23871 }
23872
23873 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23874 have an early register shift value dependency on the result of
23875 PRODUCER. */
23876
23877 int
23878 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23879 {
23880 rtx value = PATTERN (producer);
23881 rtx op = PATTERN (consumer);
23882 rtx early_op;
23883
23884 if (GET_CODE (value) == COND_EXEC)
23885 value = COND_EXEC_CODE (value);
23886 if (GET_CODE (value) == PARALLEL)
23887 value = XVECEXP (value, 0, 0);
23888 value = XEXP (value, 0);
23889 if (GET_CODE (op) == COND_EXEC)
23890 op = COND_EXEC_CODE (op);
23891 if (GET_CODE (op) == PARALLEL)
23892 op = XVECEXP (op, 0, 0);
23893 op = XEXP (op, 1);
23894
23895 early_op = XEXP (op, 0);
23896
23897 /* This is either an actual independent shift, or a shift applied to
23898 the first operand of another operation. We want the value being
23899 shifted, in either case. */
23900 if (!REG_P (early_op))
23901 early_op = XEXP (early_op, 0);
23902
23903 return !reg_overlap_mentioned_p (value, early_op);
23904 }
23905
23906 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23907 have an early register mult dependency on the result of
23908 PRODUCER. */
23909
23910 int
23911 arm_no_early_mul_dep (rtx producer, rtx consumer)
23912 {
23913 rtx value = PATTERN (producer);
23914 rtx op = PATTERN (consumer);
23915
23916 if (GET_CODE (value) == COND_EXEC)
23917 value = COND_EXEC_CODE (value);
23918 if (GET_CODE (value) == PARALLEL)
23919 value = XVECEXP (value, 0, 0);
23920 value = XEXP (value, 0);
23921 if (GET_CODE (op) == COND_EXEC)
23922 op = COND_EXEC_CODE (op);
23923 if (GET_CODE (op) == PARALLEL)
23924 op = XVECEXP (op, 0, 0);
23925 op = XEXP (op, 1);
23926
23927 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23928 {
23929 if (GET_CODE (XEXP (op, 0)) == MULT)
23930 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23931 else
23932 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23933 }
23934
23935 return 0;
23936 }
23937
23938 /* We can't rely on the caller doing the proper promotion when
23939 using APCS or ATPCS. */
23940
23941 static bool
23942 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23943 {
23944 return !TARGET_AAPCS_BASED;
23945 }
23946
23947 static enum machine_mode
23948 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23949 enum machine_mode mode,
23950 int *punsignedp ATTRIBUTE_UNUSED,
23951 const_tree fntype ATTRIBUTE_UNUSED,
23952 int for_return ATTRIBUTE_UNUSED)
23953 {
23954 if (GET_MODE_CLASS (mode) == MODE_INT
23955 && GET_MODE_SIZE (mode) < 4)
23956 return SImode;
23957
23958 return mode;
23959 }
23960
23961 /* AAPCS based ABIs use short enums by default. */
23962
23963 static bool
23964 arm_default_short_enums (void)
23965 {
23966 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23967 }
23968
23969
23970 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23971
23972 static bool
23973 arm_align_anon_bitfield (void)
23974 {
23975 return TARGET_AAPCS_BASED;
23976 }
23977
23978
23979 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23980
23981 static tree
23982 arm_cxx_guard_type (void)
23983 {
23984 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23985 }
23986
23987 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23988 has an accumulator dependency on the result of the producer (a
23989 multiplication instruction) and no other dependency on that result. */
23990 int
23991 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23992 {
23993 rtx mul = PATTERN (producer);
23994 rtx mac = PATTERN (consumer);
23995 rtx mul_result;
23996 rtx mac_op0, mac_op1, mac_acc;
23997
23998 if (GET_CODE (mul) == COND_EXEC)
23999 mul = COND_EXEC_CODE (mul);
24000 if (GET_CODE (mac) == COND_EXEC)
24001 mac = COND_EXEC_CODE (mac);
24002
24003 /* Check that mul is of the form (set (...) (mult ...))
24004 and mla is of the form (set (...) (plus (mult ...) (...))). */
24005 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
24006 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
24007 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
24008 return 0;
24009
24010 mul_result = XEXP (mul, 0);
24011 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
24012 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
24013 mac_acc = XEXP (XEXP (mac, 1), 1);
24014
24015 return (reg_overlap_mentioned_p (mul_result, mac_acc)
24016 && !reg_overlap_mentioned_p (mul_result, mac_op0)
24017 && !reg_overlap_mentioned_p (mul_result, mac_op1));
24018 }
24019
24020
24021 /* The EABI says test the least significant bit of a guard variable. */
24022
24023 static bool
24024 arm_cxx_guard_mask_bit (void)
24025 {
24026 return TARGET_AAPCS_BASED;
24027 }
24028
24029
24030 /* The EABI specifies that all array cookies are 8 bytes long. */
24031
24032 static tree
24033 arm_get_cookie_size (tree type)
24034 {
24035 tree size;
24036
24037 if (!TARGET_AAPCS_BASED)
24038 return default_cxx_get_cookie_size (type);
24039
24040 size = build_int_cst (sizetype, 8);
24041 return size;
24042 }
24043
24044
24045 /* The EABI says that array cookies should also contain the element size. */
24046
24047 static bool
24048 arm_cookie_has_size (void)
24049 {
24050 return TARGET_AAPCS_BASED;
24051 }
24052
24053
24054 /* The EABI says constructors and destructors should return a pointer to
24055 the object constructed/destroyed. */
24056
24057 static bool
24058 arm_cxx_cdtor_returns_this (void)
24059 {
24060 return TARGET_AAPCS_BASED;
24061 }
24062
24063 /* The EABI says that an inline function may never be the key
24064 method. */
24065
24066 static bool
24067 arm_cxx_key_method_may_be_inline (void)
24068 {
24069 return !TARGET_AAPCS_BASED;
24070 }
24071
24072 static void
24073 arm_cxx_determine_class_data_visibility (tree decl)
24074 {
24075 if (!TARGET_AAPCS_BASED
24076 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24077 return;
24078
24079 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24080 is exported. However, on systems without dynamic vague linkage,
24081 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24082 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24083 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24084 else
24085 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24086 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24087 }
24088
24089 static bool
24090 arm_cxx_class_data_always_comdat (void)
24091 {
24092 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24093 vague linkage if the class has no key function. */
24094 return !TARGET_AAPCS_BASED;
24095 }
24096
24097
24098 /* The EABI says __aeabi_atexit should be used to register static
24099 destructors. */
24100
24101 static bool
24102 arm_cxx_use_aeabi_atexit (void)
24103 {
24104 return TARGET_AAPCS_BASED;
24105 }
24106
24107
24108 void
24109 arm_set_return_address (rtx source, rtx scratch)
24110 {
24111 arm_stack_offsets *offsets;
24112 HOST_WIDE_INT delta;
24113 rtx addr;
24114 unsigned long saved_regs;
24115
24116 offsets = arm_get_frame_offsets ();
24117 saved_regs = offsets->saved_regs_mask;
24118
24119 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24120 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24121 else
24122 {
24123 if (frame_pointer_needed)
24124 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24125 else
24126 {
24127 /* LR will be the first saved register. */
24128 delta = offsets->outgoing_args - (offsets->frame + 4);
24129
24130
24131 if (delta >= 4096)
24132 {
24133 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24134 GEN_INT (delta & ~4095)));
24135 addr = scratch;
24136 delta &= 4095;
24137 }
24138 else
24139 addr = stack_pointer_rtx;
24140
24141 addr = plus_constant (Pmode, addr, delta);
24142 }
24143 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24144 }
24145 }
24146
24147
24148 void
24149 thumb_set_return_address (rtx source, rtx scratch)
24150 {
24151 arm_stack_offsets *offsets;
24152 HOST_WIDE_INT delta;
24153 HOST_WIDE_INT limit;
24154 int reg;
24155 rtx addr;
24156 unsigned long mask;
24157
24158 emit_use (source);
24159
24160 offsets = arm_get_frame_offsets ();
24161 mask = offsets->saved_regs_mask;
24162 if (mask & (1 << LR_REGNUM))
24163 {
24164 limit = 1024;
24165 /* Find the saved regs. */
24166 if (frame_pointer_needed)
24167 {
24168 delta = offsets->soft_frame - offsets->saved_args;
24169 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24170 if (TARGET_THUMB1)
24171 limit = 128;
24172 }
24173 else
24174 {
24175 delta = offsets->outgoing_args - offsets->saved_args;
24176 reg = SP_REGNUM;
24177 }
24178 /* Allow for the stack frame. */
24179 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24180 delta -= 16;
24181 /* The link register is always the first saved register. */
24182 delta -= 4;
24183
24184 /* Construct the address. */
24185 addr = gen_rtx_REG (SImode, reg);
24186 if (delta > limit)
24187 {
24188 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24189 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24190 addr = scratch;
24191 }
24192 else
24193 addr = plus_constant (Pmode, addr, delta);
24194
24195 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24196 }
24197 else
24198 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24199 }
24200
24201 /* Implements target hook vector_mode_supported_p. */
24202 bool
24203 arm_vector_mode_supported_p (enum machine_mode mode)
24204 {
24205 /* Neon also supports V2SImode, etc. listed in the clause below. */
24206 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24207 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24208 return true;
24209
24210 if ((TARGET_NEON || TARGET_IWMMXT)
24211 && ((mode == V2SImode)
24212 || (mode == V4HImode)
24213 || (mode == V8QImode)))
24214 return true;
24215
24216 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24217 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24218 || mode == V2HAmode))
24219 return true;
24220
24221 return false;
24222 }
24223
24224 /* Implements target hook array_mode_supported_p. */
24225
24226 static bool
24227 arm_array_mode_supported_p (enum machine_mode mode,
24228 unsigned HOST_WIDE_INT nelems)
24229 {
24230 if (TARGET_NEON
24231 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24232 && (nelems >= 2 && nelems <= 4))
24233 return true;
24234
24235 return false;
24236 }
24237
24238 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24239 registers when autovectorizing for Neon, at least until multiple vector
24240 widths are supported properly by the middle-end. */
24241
24242 static enum machine_mode
24243 arm_preferred_simd_mode (enum machine_mode mode)
24244 {
24245 if (TARGET_NEON)
24246 switch (mode)
24247 {
24248 case SFmode:
24249 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24250 case SImode:
24251 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24252 case HImode:
24253 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24254 case QImode:
24255 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24256 case DImode:
24257 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24258 return V2DImode;
24259 break;
24260
24261 default:;
24262 }
24263
24264 if (TARGET_REALLY_IWMMXT)
24265 switch (mode)
24266 {
24267 case SImode:
24268 return V2SImode;
24269 case HImode:
24270 return V4HImode;
24271 case QImode:
24272 return V8QImode;
24273
24274 default:;
24275 }
24276
24277 return word_mode;
24278 }
24279
24280 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24281
24282 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24283 using r0-r4 for function arguments, r7 for the stack frame and don't have
24284 enough left over to do doubleword arithmetic. For Thumb-2 all the
24285 potentially problematic instructions accept high registers so this is not
24286 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24287 that require many low registers. */
24288 static bool
24289 arm_class_likely_spilled_p (reg_class_t rclass)
24290 {
24291 if ((TARGET_THUMB1 && rclass == LO_REGS)
24292 || rclass == CC_REG)
24293 return true;
24294
24295 return false;
24296 }
24297
24298 /* Implements target hook small_register_classes_for_mode_p. */
24299 bool
24300 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24301 {
24302 return TARGET_THUMB1;
24303 }
24304
24305 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24306 ARM insns and therefore guarantee that the shift count is modulo 256.
24307 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24308 guarantee no particular behavior for out-of-range counts. */
24309
24310 static unsigned HOST_WIDE_INT
24311 arm_shift_truncation_mask (enum machine_mode mode)
24312 {
24313 return mode == SImode ? 255 : 0;
24314 }
24315
24316
24317 /* Map internal gcc register numbers to DWARF2 register numbers. */
24318
24319 unsigned int
24320 arm_dbx_register_number (unsigned int regno)
24321 {
24322 if (regno < 16)
24323 return regno;
24324
24325 if (IS_VFP_REGNUM (regno))
24326 {
24327 /* See comment in arm_dwarf_register_span. */
24328 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24329 return 64 + regno - FIRST_VFP_REGNUM;
24330 else
24331 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24332 }
24333
24334 if (IS_IWMMXT_GR_REGNUM (regno))
24335 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24336
24337 if (IS_IWMMXT_REGNUM (regno))
24338 return 112 + regno - FIRST_IWMMXT_REGNUM;
24339
24340 gcc_unreachable ();
24341 }
24342
24343 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24344 GCC models tham as 64 32-bit registers, so we need to describe this to
24345 the DWARF generation code. Other registers can use the default. */
24346 static rtx
24347 arm_dwarf_register_span (rtx rtl)
24348 {
24349 unsigned regno;
24350 int nregs;
24351 int i;
24352 rtx p;
24353
24354 regno = REGNO (rtl);
24355 if (!IS_VFP_REGNUM (regno))
24356 return NULL_RTX;
24357
24358 /* XXX FIXME: The EABI defines two VFP register ranges:
24359 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24360 256-287: D0-D31
24361 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24362 corresponding D register. Until GDB supports this, we shall use the
24363 legacy encodings. We also use these encodings for D0-D15 for
24364 compatibility with older debuggers. */
24365 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24366 return NULL_RTX;
24367
24368 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24369 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24370 regno = (regno - FIRST_VFP_REGNUM) / 2;
24371 for (i = 0; i < nregs; i++)
24372 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24373
24374 return p;
24375 }
24376
24377 #if ARM_UNWIND_INFO
24378 /* Emit unwind directives for a store-multiple instruction or stack pointer
24379 push during alignment.
24380 These should only ever be generated by the function prologue code, so
24381 expect them to have a particular form. */
24382
24383 static void
24384 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24385 {
24386 int i;
24387 HOST_WIDE_INT offset;
24388 HOST_WIDE_INT nregs;
24389 int reg_size;
24390 unsigned reg;
24391 unsigned lastreg;
24392 rtx e;
24393
24394 e = XVECEXP (p, 0, 0);
24395 if (GET_CODE (e) != SET)
24396 abort ();
24397
24398 /* First insn will adjust the stack pointer. */
24399 if (GET_CODE (e) != SET
24400 || !REG_P (XEXP (e, 0))
24401 || REGNO (XEXP (e, 0)) != SP_REGNUM
24402 || GET_CODE (XEXP (e, 1)) != PLUS)
24403 abort ();
24404
24405 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24406 nregs = XVECLEN (p, 0) - 1;
24407
24408 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24409 if (reg < 16)
24410 {
24411 /* The function prologue may also push pc, but not annotate it as it is
24412 never restored. We turn this into a stack pointer adjustment. */
24413 if (nregs * 4 == offset - 4)
24414 {
24415 fprintf (asm_out_file, "\t.pad #4\n");
24416 offset -= 4;
24417 }
24418 reg_size = 4;
24419 fprintf (asm_out_file, "\t.save {");
24420 }
24421 else if (IS_VFP_REGNUM (reg))
24422 {
24423 reg_size = 8;
24424 fprintf (asm_out_file, "\t.vsave {");
24425 }
24426 else
24427 /* Unknown register type. */
24428 abort ();
24429
24430 /* If the stack increment doesn't match the size of the saved registers,
24431 something has gone horribly wrong. */
24432 if (offset != nregs * reg_size)
24433 abort ();
24434
24435 offset = 0;
24436 lastreg = 0;
24437 /* The remaining insns will describe the stores. */
24438 for (i = 1; i <= nregs; i++)
24439 {
24440 /* Expect (set (mem <addr>) (reg)).
24441 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24442 e = XVECEXP (p, 0, i);
24443 if (GET_CODE (e) != SET
24444 || !MEM_P (XEXP (e, 0))
24445 || !REG_P (XEXP (e, 1)))
24446 abort ();
24447
24448 reg = REGNO (XEXP (e, 1));
24449 if (reg < lastreg)
24450 abort ();
24451
24452 if (i != 1)
24453 fprintf (asm_out_file, ", ");
24454 /* We can't use %r for vfp because we need to use the
24455 double precision register names. */
24456 if (IS_VFP_REGNUM (reg))
24457 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24458 else
24459 asm_fprintf (asm_out_file, "%r", reg);
24460
24461 #ifdef ENABLE_CHECKING
24462 /* Check that the addresses are consecutive. */
24463 e = XEXP (XEXP (e, 0), 0);
24464 if (GET_CODE (e) == PLUS)
24465 {
24466 offset += reg_size;
24467 if (!REG_P (XEXP (e, 0))
24468 || REGNO (XEXP (e, 0)) != SP_REGNUM
24469 || !CONST_INT_P (XEXP (e, 1))
24470 || offset != INTVAL (XEXP (e, 1)))
24471 abort ();
24472 }
24473 else if (i != 1
24474 || !REG_P (e)
24475 || REGNO (e) != SP_REGNUM)
24476 abort ();
24477 #endif
24478 }
24479 fprintf (asm_out_file, "}\n");
24480 }
24481
24482 /* Emit unwind directives for a SET. */
24483
24484 static void
24485 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24486 {
24487 rtx e0;
24488 rtx e1;
24489 unsigned reg;
24490
24491 e0 = XEXP (p, 0);
24492 e1 = XEXP (p, 1);
24493 switch (GET_CODE (e0))
24494 {
24495 case MEM:
24496 /* Pushing a single register. */
24497 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24498 || !REG_P (XEXP (XEXP (e0, 0), 0))
24499 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24500 abort ();
24501
24502 asm_fprintf (asm_out_file, "\t.save ");
24503 if (IS_VFP_REGNUM (REGNO (e1)))
24504 asm_fprintf(asm_out_file, "{d%d}\n",
24505 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24506 else
24507 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24508 break;
24509
24510 case REG:
24511 if (REGNO (e0) == SP_REGNUM)
24512 {
24513 /* A stack increment. */
24514 if (GET_CODE (e1) != PLUS
24515 || !REG_P (XEXP (e1, 0))
24516 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24517 || !CONST_INT_P (XEXP (e1, 1)))
24518 abort ();
24519
24520 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24521 -INTVAL (XEXP (e1, 1)));
24522 }
24523 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24524 {
24525 HOST_WIDE_INT offset;
24526
24527 if (GET_CODE (e1) == PLUS)
24528 {
24529 if (!REG_P (XEXP (e1, 0))
24530 || !CONST_INT_P (XEXP (e1, 1)))
24531 abort ();
24532 reg = REGNO (XEXP (e1, 0));
24533 offset = INTVAL (XEXP (e1, 1));
24534 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24535 HARD_FRAME_POINTER_REGNUM, reg,
24536 offset);
24537 }
24538 else if (REG_P (e1))
24539 {
24540 reg = REGNO (e1);
24541 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24542 HARD_FRAME_POINTER_REGNUM, reg);
24543 }
24544 else
24545 abort ();
24546 }
24547 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
24548 {
24549 /* Move from sp to reg. */
24550 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24551 }
24552 else if (GET_CODE (e1) == PLUS
24553 && REG_P (XEXP (e1, 0))
24554 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24555 && CONST_INT_P (XEXP (e1, 1)))
24556 {
24557 /* Set reg to offset from sp. */
24558 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24559 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24560 }
24561 else
24562 abort ();
24563 break;
24564
24565 default:
24566 abort ();
24567 }
24568 }
24569
24570
24571 /* Emit unwind directives for the given insn. */
24572
24573 static void
24574 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24575 {
24576 rtx note, pat;
24577 bool handled_one = false;
24578
24579 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24580 return;
24581
24582 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24583 && (TREE_NOTHROW (current_function_decl)
24584 || crtl->all_throwers_are_sibcalls))
24585 return;
24586
24587 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24588 return;
24589
24590 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24591 {
24592 pat = XEXP (note, 0);
24593 switch (REG_NOTE_KIND (note))
24594 {
24595 case REG_FRAME_RELATED_EXPR:
24596 goto found;
24597
24598 case REG_CFA_REGISTER:
24599 if (pat == NULL)
24600 {
24601 pat = PATTERN (insn);
24602 if (GET_CODE (pat) == PARALLEL)
24603 pat = XVECEXP (pat, 0, 0);
24604 }
24605
24606 /* Only emitted for IS_STACKALIGN re-alignment. */
24607 {
24608 rtx dest, src;
24609 unsigned reg;
24610
24611 src = SET_SRC (pat);
24612 dest = SET_DEST (pat);
24613
24614 gcc_assert (src == stack_pointer_rtx);
24615 reg = REGNO (dest);
24616 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24617 reg + 0x90, reg);
24618 }
24619 handled_one = true;
24620 break;
24621
24622 case REG_CFA_DEF_CFA:
24623 case REG_CFA_EXPRESSION:
24624 case REG_CFA_ADJUST_CFA:
24625 case REG_CFA_OFFSET:
24626 /* ??? Only handling here what we actually emit. */
24627 gcc_unreachable ();
24628
24629 default:
24630 break;
24631 }
24632 }
24633 if (handled_one)
24634 return;
24635 pat = PATTERN (insn);
24636 found:
24637
24638 switch (GET_CODE (pat))
24639 {
24640 case SET:
24641 arm_unwind_emit_set (asm_out_file, pat);
24642 break;
24643
24644 case SEQUENCE:
24645 /* Store multiple. */
24646 arm_unwind_emit_sequence (asm_out_file, pat);
24647 break;
24648
24649 default:
24650 abort();
24651 }
24652 }
24653
24654
24655 /* Output a reference from a function exception table to the type_info
24656 object X. The EABI specifies that the symbol should be relocated by
24657 an R_ARM_TARGET2 relocation. */
24658
24659 static bool
24660 arm_output_ttype (rtx x)
24661 {
24662 fputs ("\t.word\t", asm_out_file);
24663 output_addr_const (asm_out_file, x);
24664 /* Use special relocations for symbol references. */
24665 if (!CONST_INT_P (x))
24666 fputs ("(TARGET2)", asm_out_file);
24667 fputc ('\n', asm_out_file);
24668
24669 return TRUE;
24670 }
24671
24672 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24673
24674 static void
24675 arm_asm_emit_except_personality (rtx personality)
24676 {
24677 fputs ("\t.personality\t", asm_out_file);
24678 output_addr_const (asm_out_file, personality);
24679 fputc ('\n', asm_out_file);
24680 }
24681
24682 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24683
24684 static void
24685 arm_asm_init_sections (void)
24686 {
24687 exception_section = get_unnamed_section (0, output_section_asm_op,
24688 "\t.handlerdata");
24689 }
24690 #endif /* ARM_UNWIND_INFO */
24691
24692 /* Output unwind directives for the start/end of a function. */
24693
24694 void
24695 arm_output_fn_unwind (FILE * f, bool prologue)
24696 {
24697 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24698 return;
24699
24700 if (prologue)
24701 fputs ("\t.fnstart\n", f);
24702 else
24703 {
24704 /* If this function will never be unwound, then mark it as such.
24705 The came condition is used in arm_unwind_emit to suppress
24706 the frame annotations. */
24707 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24708 && (TREE_NOTHROW (current_function_decl)
24709 || crtl->all_throwers_are_sibcalls))
24710 fputs("\t.cantunwind\n", f);
24711
24712 fputs ("\t.fnend\n", f);
24713 }
24714 }
24715
24716 static bool
24717 arm_emit_tls_decoration (FILE *fp, rtx x)
24718 {
24719 enum tls_reloc reloc;
24720 rtx val;
24721
24722 val = XVECEXP (x, 0, 0);
24723 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24724
24725 output_addr_const (fp, val);
24726
24727 switch (reloc)
24728 {
24729 case TLS_GD32:
24730 fputs ("(tlsgd)", fp);
24731 break;
24732 case TLS_LDM32:
24733 fputs ("(tlsldm)", fp);
24734 break;
24735 case TLS_LDO32:
24736 fputs ("(tlsldo)", fp);
24737 break;
24738 case TLS_IE32:
24739 fputs ("(gottpoff)", fp);
24740 break;
24741 case TLS_LE32:
24742 fputs ("(tpoff)", fp);
24743 break;
24744 case TLS_DESCSEQ:
24745 fputs ("(tlsdesc)", fp);
24746 break;
24747 default:
24748 gcc_unreachable ();
24749 }
24750
24751 switch (reloc)
24752 {
24753 case TLS_GD32:
24754 case TLS_LDM32:
24755 case TLS_IE32:
24756 case TLS_DESCSEQ:
24757 fputs (" + (. - ", fp);
24758 output_addr_const (fp, XVECEXP (x, 0, 2));
24759 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24760 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24761 output_addr_const (fp, XVECEXP (x, 0, 3));
24762 fputc (')', fp);
24763 break;
24764 default:
24765 break;
24766 }
24767
24768 return TRUE;
24769 }
24770
24771 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24772
24773 static void
24774 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24775 {
24776 gcc_assert (size == 4);
24777 fputs ("\t.word\t", file);
24778 output_addr_const (file, x);
24779 fputs ("(tlsldo)", file);
24780 }
24781
24782 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24783
24784 static bool
24785 arm_output_addr_const_extra (FILE *fp, rtx x)
24786 {
24787 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24788 return arm_emit_tls_decoration (fp, x);
24789 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24790 {
24791 char label[256];
24792 int labelno = INTVAL (XVECEXP (x, 0, 0));
24793
24794 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24795 assemble_name_raw (fp, label);
24796
24797 return TRUE;
24798 }
24799 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24800 {
24801 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24802 if (GOT_PCREL)
24803 fputs ("+.", fp);
24804 fputs ("-(", fp);
24805 output_addr_const (fp, XVECEXP (x, 0, 0));
24806 fputc (')', fp);
24807 return TRUE;
24808 }
24809 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24810 {
24811 output_addr_const (fp, XVECEXP (x, 0, 0));
24812 if (GOT_PCREL)
24813 fputs ("+.", fp);
24814 fputs ("-(", fp);
24815 output_addr_const (fp, XVECEXP (x, 0, 1));
24816 fputc (')', fp);
24817 return TRUE;
24818 }
24819 else if (GET_CODE (x) == CONST_VECTOR)
24820 return arm_emit_vector_const (fp, x);
24821
24822 return FALSE;
24823 }
24824
24825 /* Output assembly for a shift instruction.
24826 SET_FLAGS determines how the instruction modifies the condition codes.
24827 0 - Do not set condition codes.
24828 1 - Set condition codes.
24829 2 - Use smallest instruction. */
24830 const char *
24831 arm_output_shift(rtx * operands, int set_flags)
24832 {
24833 char pattern[100];
24834 static const char flag_chars[3] = {'?', '.', '!'};
24835 const char *shift;
24836 HOST_WIDE_INT val;
24837 char c;
24838
24839 c = flag_chars[set_flags];
24840 if (TARGET_UNIFIED_ASM)
24841 {
24842 shift = shift_op(operands[3], &val);
24843 if (shift)
24844 {
24845 if (val != -1)
24846 operands[2] = GEN_INT(val);
24847 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24848 }
24849 else
24850 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24851 }
24852 else
24853 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24854 output_asm_insn (pattern, operands);
24855 return "";
24856 }
24857
24858 /* Output assembly for a WMMX immediate shift instruction. */
24859 const char *
24860 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24861 {
24862 int shift = INTVAL (operands[2]);
24863 char templ[50];
24864 enum machine_mode opmode = GET_MODE (operands[0]);
24865
24866 gcc_assert (shift >= 0);
24867
24868 /* If the shift value in the register versions is > 63 (for D qualifier),
24869 31 (for W qualifier) or 15 (for H qualifier). */
24870 if (((opmode == V4HImode) && (shift > 15))
24871 || ((opmode == V2SImode) && (shift > 31))
24872 || ((opmode == DImode) && (shift > 63)))
24873 {
24874 if (wror_or_wsra)
24875 {
24876 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24877 output_asm_insn (templ, operands);
24878 if (opmode == DImode)
24879 {
24880 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24881 output_asm_insn (templ, operands);
24882 }
24883 }
24884 else
24885 {
24886 /* The destination register will contain all zeros. */
24887 sprintf (templ, "wzero\t%%0");
24888 output_asm_insn (templ, operands);
24889 }
24890 return "";
24891 }
24892
24893 if ((opmode == DImode) && (shift > 32))
24894 {
24895 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24896 output_asm_insn (templ, operands);
24897 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24898 output_asm_insn (templ, operands);
24899 }
24900 else
24901 {
24902 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24903 output_asm_insn (templ, operands);
24904 }
24905 return "";
24906 }
24907
24908 /* Output assembly for a WMMX tinsr instruction. */
24909 const char *
24910 arm_output_iwmmxt_tinsr (rtx *operands)
24911 {
24912 int mask = INTVAL (operands[3]);
24913 int i;
24914 char templ[50];
24915 int units = mode_nunits[GET_MODE (operands[0])];
24916 gcc_assert ((mask & (mask - 1)) == 0);
24917 for (i = 0; i < units; ++i)
24918 {
24919 if ((mask & 0x01) == 1)
24920 {
24921 break;
24922 }
24923 mask >>= 1;
24924 }
24925 gcc_assert (i < units);
24926 {
24927 switch (GET_MODE (operands[0]))
24928 {
24929 case V8QImode:
24930 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24931 break;
24932 case V4HImode:
24933 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24934 break;
24935 case V2SImode:
24936 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24937 break;
24938 default:
24939 gcc_unreachable ();
24940 break;
24941 }
24942 output_asm_insn (templ, operands);
24943 }
24944 return "";
24945 }
24946
24947 /* Output a Thumb-1 casesi dispatch sequence. */
24948 const char *
24949 thumb1_output_casesi (rtx *operands)
24950 {
24951 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24952
24953 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24954
24955 switch (GET_MODE(diff_vec))
24956 {
24957 case QImode:
24958 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24959 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24960 case HImode:
24961 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24962 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24963 case SImode:
24964 return "bl\t%___gnu_thumb1_case_si";
24965 default:
24966 gcc_unreachable ();
24967 }
24968 }
24969
24970 /* Output a Thumb-2 casesi instruction. */
24971 const char *
24972 thumb2_output_casesi (rtx *operands)
24973 {
24974 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24975
24976 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24977
24978 output_asm_insn ("cmp\t%0, %1", operands);
24979 output_asm_insn ("bhi\t%l3", operands);
24980 switch (GET_MODE(diff_vec))
24981 {
24982 case QImode:
24983 return "tbb\t[%|pc, %0]";
24984 case HImode:
24985 return "tbh\t[%|pc, %0, lsl #1]";
24986 case SImode:
24987 if (flag_pic)
24988 {
24989 output_asm_insn ("adr\t%4, %l2", operands);
24990 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24991 output_asm_insn ("add\t%4, %4, %5", operands);
24992 return "bx\t%4";
24993 }
24994 else
24995 {
24996 output_asm_insn ("adr\t%4, %l2", operands);
24997 return "ldr\t%|pc, [%4, %0, lsl #2]";
24998 }
24999 default:
25000 gcc_unreachable ();
25001 }
25002 }
25003
25004 /* Most ARM cores are single issue, but some newer ones can dual issue.
25005 The scheduler descriptions rely on this being correct. */
25006 static int
25007 arm_issue_rate (void)
25008 {
25009 switch (arm_tune)
25010 {
25011 case cortexa15:
25012 return 3;
25013
25014 case cortexr4:
25015 case cortexr4f:
25016 case cortexr5:
25017 case genericv7a:
25018 case cortexa5:
25019 case cortexa8:
25020 case cortexa9:
25021 case fa726te:
25022 return 2;
25023
25024 default:
25025 return 1;
25026 }
25027 }
25028
25029 /* A table and a function to perform ARM-specific name mangling for
25030 NEON vector types in order to conform to the AAPCS (see "Procedure
25031 Call Standard for the ARM Architecture", Appendix A). To qualify
25032 for emission with the mangled names defined in that document, a
25033 vector type must not only be of the correct mode but also be
25034 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25035 typedef struct
25036 {
25037 enum machine_mode mode;
25038 const char *element_type_name;
25039 const char *aapcs_name;
25040 } arm_mangle_map_entry;
25041
25042 static arm_mangle_map_entry arm_mangle_map[] = {
25043 /* 64-bit containerized types. */
25044 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25045 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25046 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25047 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25048 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25049 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25050 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25051 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25052 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25053 /* 128-bit containerized types. */
25054 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25055 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25056 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25057 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25058 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25059 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25060 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25061 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25062 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25063 { VOIDmode, NULL, NULL }
25064 };
25065
25066 const char *
25067 arm_mangle_type (const_tree type)
25068 {
25069 arm_mangle_map_entry *pos = arm_mangle_map;
25070
25071 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25072 has to be managled as if it is in the "std" namespace. */
25073 if (TARGET_AAPCS_BASED
25074 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25075 {
25076 static bool warned;
25077 if (!warned && warn_psabi && !in_system_header)
25078 {
25079 warned = true;
25080 inform (input_location,
25081 "the mangling of %<va_list%> has changed in GCC 4.4");
25082 }
25083 return "St9__va_list";
25084 }
25085
25086 /* Half-precision float. */
25087 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25088 return "Dh";
25089
25090 if (TREE_CODE (type) != VECTOR_TYPE)
25091 return NULL;
25092
25093 /* Check the mode of the vector type, and the name of the vector
25094 element type, against the table. */
25095 while (pos->mode != VOIDmode)
25096 {
25097 tree elt_type = TREE_TYPE (type);
25098
25099 if (pos->mode == TYPE_MODE (type)
25100 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25101 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25102 pos->element_type_name))
25103 return pos->aapcs_name;
25104
25105 pos++;
25106 }
25107
25108 /* Use the default mangling for unrecognized (possibly user-defined)
25109 vector types. */
25110 return NULL;
25111 }
25112
25113 /* Order of allocation of core registers for Thumb: this allocation is
25114 written over the corresponding initial entries of the array
25115 initialized with REG_ALLOC_ORDER. We allocate all low registers
25116 first. Saving and restoring a low register is usually cheaper than
25117 using a call-clobbered high register. */
25118
25119 static const int thumb_core_reg_alloc_order[] =
25120 {
25121 3, 2, 1, 0, 4, 5, 6, 7,
25122 14, 12, 8, 9, 10, 11
25123 };
25124
25125 /* Adjust register allocation order when compiling for Thumb. */
25126
25127 void
25128 arm_order_regs_for_local_alloc (void)
25129 {
25130 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25131 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25132 if (TARGET_THUMB)
25133 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25134 sizeof (thumb_core_reg_alloc_order));
25135 }
25136
25137 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25138
25139 bool
25140 arm_frame_pointer_required (void)
25141 {
25142 return (cfun->has_nonlocal_label
25143 || SUBTARGET_FRAME_POINTER_REQUIRED
25144 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25145 }
25146
25147 /* Only thumb1 can't support conditional execution, so return true if
25148 the target is not thumb1. */
25149 static bool
25150 arm_have_conditional_execution (void)
25151 {
25152 return !TARGET_THUMB1;
25153 }
25154
25155 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25156 static HOST_WIDE_INT
25157 arm_vector_alignment (const_tree type)
25158 {
25159 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
25160
25161 if (TARGET_AAPCS_BASED)
25162 align = MIN (align, 64);
25163
25164 return align;
25165 }
25166
25167 static unsigned int
25168 arm_autovectorize_vector_sizes (void)
25169 {
25170 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25171 }
25172
25173 static bool
25174 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25175 {
25176 /* Vectors which aren't in packed structures will not be less aligned than
25177 the natural alignment of their element type, so this is safe. */
25178 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25179 return !is_packed;
25180
25181 return default_builtin_vector_alignment_reachable (type, is_packed);
25182 }
25183
25184 static bool
25185 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25186 const_tree type, int misalignment,
25187 bool is_packed)
25188 {
25189 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25190 {
25191 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25192
25193 if (is_packed)
25194 return align == 1;
25195
25196 /* If the misalignment is unknown, we should be able to handle the access
25197 so long as it is not to a member of a packed data structure. */
25198 if (misalignment == -1)
25199 return true;
25200
25201 /* Return true if the misalignment is a multiple of the natural alignment
25202 of the vector's element type. This is probably always going to be
25203 true in practice, since we've already established that this isn't a
25204 packed access. */
25205 return ((misalignment % align) == 0);
25206 }
25207
25208 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25209 is_packed);
25210 }
25211
25212 static void
25213 arm_conditional_register_usage (void)
25214 {
25215 int regno;
25216
25217 if (TARGET_THUMB1 && optimize_size)
25218 {
25219 /* When optimizing for size on Thumb-1, it's better not
25220 to use the HI regs, because of the overhead of
25221 stacking them. */
25222 for (regno = FIRST_HI_REGNUM;
25223 regno <= LAST_HI_REGNUM; ++regno)
25224 fixed_regs[regno] = call_used_regs[regno] = 1;
25225 }
25226
25227 /* The link register can be clobbered by any branch insn,
25228 but we have no way to track that at present, so mark
25229 it as unavailable. */
25230 if (TARGET_THUMB1)
25231 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25232
25233 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25234 {
25235 /* VFPv3 registers are disabled when earlier VFP
25236 versions are selected due to the definition of
25237 LAST_VFP_REGNUM. */
25238 for (regno = FIRST_VFP_REGNUM;
25239 regno <= LAST_VFP_REGNUM; ++ regno)
25240 {
25241 fixed_regs[regno] = 0;
25242 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25243 || regno >= FIRST_VFP_REGNUM + 32;
25244 }
25245 }
25246
25247 if (TARGET_REALLY_IWMMXT)
25248 {
25249 regno = FIRST_IWMMXT_GR_REGNUM;
25250 /* The 2002/10/09 revision of the XScale ABI has wCG0
25251 and wCG1 as call-preserved registers. The 2002/11/21
25252 revision changed this so that all wCG registers are
25253 scratch registers. */
25254 for (regno = FIRST_IWMMXT_GR_REGNUM;
25255 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25256 fixed_regs[regno] = 0;
25257 /* The XScale ABI has wR0 - wR9 as scratch registers,
25258 the rest as call-preserved registers. */
25259 for (regno = FIRST_IWMMXT_REGNUM;
25260 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25261 {
25262 fixed_regs[regno] = 0;
25263 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25264 }
25265 }
25266
25267 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25268 {
25269 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25270 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25271 }
25272 else if (TARGET_APCS_STACK)
25273 {
25274 fixed_regs[10] = 1;
25275 call_used_regs[10] = 1;
25276 }
25277 /* -mcaller-super-interworking reserves r11 for calls to
25278 _interwork_r11_call_via_rN(). Making the register global
25279 is an easy way of ensuring that it remains valid for all
25280 calls. */
25281 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25282 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25283 {
25284 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25285 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25286 if (TARGET_CALLER_INTERWORKING)
25287 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25288 }
25289 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25290 }
25291
25292 static reg_class_t
25293 arm_preferred_rename_class (reg_class_t rclass)
25294 {
25295 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25296 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25297 and code size can be reduced. */
25298 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25299 return LO_REGS;
25300 else
25301 return NO_REGS;
25302 }
25303
25304 /* Compute the atrribute "length" of insn "*push_multi".
25305 So this function MUST be kept in sync with that insn pattern. */
25306 int
25307 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25308 {
25309 int i, regno, hi_reg;
25310 int num_saves = XVECLEN (parallel_op, 0);
25311
25312 /* ARM mode. */
25313 if (TARGET_ARM)
25314 return 4;
25315 /* Thumb1 mode. */
25316 if (TARGET_THUMB1)
25317 return 2;
25318
25319 /* Thumb2 mode. */
25320 regno = REGNO (first_op);
25321 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25322 for (i = 1; i < num_saves && !hi_reg; i++)
25323 {
25324 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25325 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25326 }
25327
25328 if (!hi_reg)
25329 return 2;
25330 return 4;
25331 }
25332
25333 /* Compute the number of instructions emitted by output_move_double. */
25334 int
25335 arm_count_output_move_double_insns (rtx *operands)
25336 {
25337 int count;
25338 rtx ops[2];
25339 /* output_move_double may modify the operands array, so call it
25340 here on a copy of the array. */
25341 ops[0] = operands[0];
25342 ops[1] = operands[1];
25343 output_move_double (ops, false, &count);
25344 return count;
25345 }
25346
25347 int
25348 vfp3_const_double_for_fract_bits (rtx operand)
25349 {
25350 REAL_VALUE_TYPE r0;
25351
25352 if (!CONST_DOUBLE_P (operand))
25353 return 0;
25354
25355 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25356 if (exact_real_inverse (DFmode, &r0))
25357 {
25358 if (exact_real_truncate (DFmode, &r0))
25359 {
25360 HOST_WIDE_INT value = real_to_integer (&r0);
25361 value = value & 0xffffffff;
25362 if ((value != 0) && ( (value & (value - 1)) == 0))
25363 return int_log2 (value);
25364 }
25365 }
25366 return 0;
25367 }
25368 \f
25369 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25370
25371 static void
25372 arm_pre_atomic_barrier (enum memmodel model)
25373 {
25374 if (need_atomic_barrier_p (model, true))
25375 emit_insn (gen_memory_barrier ());
25376 }
25377
25378 static void
25379 arm_post_atomic_barrier (enum memmodel model)
25380 {
25381 if (need_atomic_barrier_p (model, false))
25382 emit_insn (gen_memory_barrier ());
25383 }
25384
25385 /* Emit the load-exclusive and store-exclusive instructions. */
25386
25387 static void
25388 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25389 {
25390 rtx (*gen) (rtx, rtx);
25391
25392 switch (mode)
25393 {
25394 case QImode: gen = gen_arm_load_exclusiveqi; break;
25395 case HImode: gen = gen_arm_load_exclusivehi; break;
25396 case SImode: gen = gen_arm_load_exclusivesi; break;
25397 case DImode: gen = gen_arm_load_exclusivedi; break;
25398 default:
25399 gcc_unreachable ();
25400 }
25401
25402 emit_insn (gen (rval, mem));
25403 }
25404
25405 static void
25406 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25407 {
25408 rtx (*gen) (rtx, rtx, rtx);
25409
25410 switch (mode)
25411 {
25412 case QImode: gen = gen_arm_store_exclusiveqi; break;
25413 case HImode: gen = gen_arm_store_exclusivehi; break;
25414 case SImode: gen = gen_arm_store_exclusivesi; break;
25415 case DImode: gen = gen_arm_store_exclusivedi; break;
25416 default:
25417 gcc_unreachable ();
25418 }
25419
25420 emit_insn (gen (bval, rval, mem));
25421 }
25422
25423 /* Mark the previous jump instruction as unlikely. */
25424
25425 static void
25426 emit_unlikely_jump (rtx insn)
25427 {
25428 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25429
25430 insn = emit_jump_insn (insn);
25431 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25432 }
25433
25434 /* Expand a compare and swap pattern. */
25435
25436 void
25437 arm_expand_compare_and_swap (rtx operands[])
25438 {
25439 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25440 enum machine_mode mode;
25441 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25442
25443 bval = operands[0];
25444 rval = operands[1];
25445 mem = operands[2];
25446 oldval = operands[3];
25447 newval = operands[4];
25448 is_weak = operands[5];
25449 mod_s = operands[6];
25450 mod_f = operands[7];
25451 mode = GET_MODE (mem);
25452
25453 switch (mode)
25454 {
25455 case QImode:
25456 case HImode:
25457 /* For narrow modes, we're going to perform the comparison in SImode,
25458 so do the zero-extension now. */
25459 rval = gen_reg_rtx (SImode);
25460 oldval = convert_modes (SImode, mode, oldval, true);
25461 /* FALLTHRU */
25462
25463 case SImode:
25464 /* Force the value into a register if needed. We waited until after
25465 the zero-extension above to do this properly. */
25466 if (!arm_add_operand (oldval, mode))
25467 oldval = force_reg (mode, oldval);
25468 break;
25469
25470 case DImode:
25471 if (!cmpdi_operand (oldval, mode))
25472 oldval = force_reg (mode, oldval);
25473 break;
25474
25475 default:
25476 gcc_unreachable ();
25477 }
25478
25479 switch (mode)
25480 {
25481 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25482 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25483 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25484 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25485 default:
25486 gcc_unreachable ();
25487 }
25488
25489 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25490
25491 if (mode == QImode || mode == HImode)
25492 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25493
25494 /* In all cases, we arrange for success to be signaled by Z set.
25495 This arrangement allows for the boolean result to be used directly
25496 in a subsequent branch, post optimization. */
25497 x = gen_rtx_REG (CCmode, CC_REGNUM);
25498 x = gen_rtx_EQ (SImode, x, const0_rtx);
25499 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25500 }
25501
25502 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25503 another memory store between the load-exclusive and store-exclusive can
25504 reset the monitor from Exclusive to Open state. This means we must wait
25505 until after reload to split the pattern, lest we get a register spill in
25506 the middle of the atomic sequence. */
25507
25508 void
25509 arm_split_compare_and_swap (rtx operands[])
25510 {
25511 rtx rval, mem, oldval, newval, scratch;
25512 enum machine_mode mode;
25513 enum memmodel mod_s, mod_f;
25514 bool is_weak;
25515 rtx label1, label2, x, cond;
25516
25517 rval = operands[0];
25518 mem = operands[1];
25519 oldval = operands[2];
25520 newval = operands[3];
25521 is_weak = (operands[4] != const0_rtx);
25522 mod_s = (enum memmodel) INTVAL (operands[5]);
25523 mod_f = (enum memmodel) INTVAL (operands[6]);
25524 scratch = operands[7];
25525 mode = GET_MODE (mem);
25526
25527 arm_pre_atomic_barrier (mod_s);
25528
25529 label1 = NULL_RTX;
25530 if (!is_weak)
25531 {
25532 label1 = gen_label_rtx ();
25533 emit_label (label1);
25534 }
25535 label2 = gen_label_rtx ();
25536
25537 arm_emit_load_exclusive (mode, rval, mem);
25538
25539 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25540 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25541 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25542 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25543 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25544
25545 arm_emit_store_exclusive (mode, scratch, mem, newval);
25546
25547 /* Weak or strong, we want EQ to be true for success, so that we
25548 match the flags that we got from the compare above. */
25549 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25550 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25551 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25552
25553 if (!is_weak)
25554 {
25555 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25556 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25557 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25558 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25559 }
25560
25561 if (mod_f != MEMMODEL_RELAXED)
25562 emit_label (label2);
25563
25564 arm_post_atomic_barrier (mod_s);
25565
25566 if (mod_f == MEMMODEL_RELAXED)
25567 emit_label (label2);
25568 }
25569
25570 void
25571 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25572 rtx value, rtx model_rtx, rtx cond)
25573 {
25574 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25575 enum machine_mode mode = GET_MODE (mem);
25576 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25577 rtx label, x;
25578
25579 arm_pre_atomic_barrier (model);
25580
25581 label = gen_label_rtx ();
25582 emit_label (label);
25583
25584 if (new_out)
25585 new_out = gen_lowpart (wmode, new_out);
25586 if (old_out)
25587 old_out = gen_lowpart (wmode, old_out);
25588 else
25589 old_out = new_out;
25590 value = simplify_gen_subreg (wmode, value, mode, 0);
25591
25592 arm_emit_load_exclusive (mode, old_out, mem);
25593
25594 switch (code)
25595 {
25596 case SET:
25597 new_out = value;
25598 break;
25599
25600 case NOT:
25601 x = gen_rtx_AND (wmode, old_out, value);
25602 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25603 x = gen_rtx_NOT (wmode, new_out);
25604 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25605 break;
25606
25607 case MINUS:
25608 if (CONST_INT_P (value))
25609 {
25610 value = GEN_INT (-INTVAL (value));
25611 code = PLUS;
25612 }
25613 /* FALLTHRU */
25614
25615 case PLUS:
25616 if (mode == DImode)
25617 {
25618 /* DImode plus/minus need to clobber flags. */
25619 /* The adddi3 and subdi3 patterns are incorrectly written so that
25620 they require matching operands, even when we could easily support
25621 three operands. Thankfully, this can be fixed up post-splitting,
25622 as the individual add+adc patterns do accept three operands and
25623 post-reload cprop can make these moves go away. */
25624 emit_move_insn (new_out, old_out);
25625 if (code == PLUS)
25626 x = gen_adddi3 (new_out, new_out, value);
25627 else
25628 x = gen_subdi3 (new_out, new_out, value);
25629 emit_insn (x);
25630 break;
25631 }
25632 /* FALLTHRU */
25633
25634 default:
25635 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25636 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25637 break;
25638 }
25639
25640 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25641
25642 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25643 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25644
25645 arm_post_atomic_barrier (model);
25646 }
25647 \f
25648 #define MAX_VECT_LEN 16
25649
25650 struct expand_vec_perm_d
25651 {
25652 rtx target, op0, op1;
25653 unsigned char perm[MAX_VECT_LEN];
25654 enum machine_mode vmode;
25655 unsigned char nelt;
25656 bool one_vector_p;
25657 bool testing_p;
25658 };
25659
25660 /* Generate a variable permutation. */
25661
25662 static void
25663 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25664 {
25665 enum machine_mode vmode = GET_MODE (target);
25666 bool one_vector_p = rtx_equal_p (op0, op1);
25667
25668 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25669 gcc_checking_assert (GET_MODE (op0) == vmode);
25670 gcc_checking_assert (GET_MODE (op1) == vmode);
25671 gcc_checking_assert (GET_MODE (sel) == vmode);
25672 gcc_checking_assert (TARGET_NEON);
25673
25674 if (one_vector_p)
25675 {
25676 if (vmode == V8QImode)
25677 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25678 else
25679 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25680 }
25681 else
25682 {
25683 rtx pair;
25684
25685 if (vmode == V8QImode)
25686 {
25687 pair = gen_reg_rtx (V16QImode);
25688 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25689 pair = gen_lowpart (TImode, pair);
25690 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25691 }
25692 else
25693 {
25694 pair = gen_reg_rtx (OImode);
25695 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25696 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25697 }
25698 }
25699 }
25700
25701 void
25702 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25703 {
25704 enum machine_mode vmode = GET_MODE (target);
25705 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25706 bool one_vector_p = rtx_equal_p (op0, op1);
25707 rtx rmask[MAX_VECT_LEN], mask;
25708
25709 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25710 numbering of elements for big-endian, we must reverse the order. */
25711 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25712
25713 /* The VTBL instruction does not use a modulo index, so we must take care
25714 of that ourselves. */
25715 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25716 for (i = 0; i < nelt; ++i)
25717 rmask[i] = mask;
25718 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25719 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25720
25721 arm_expand_vec_perm_1 (target, op0, op1, sel);
25722 }
25723
25724 /* Generate or test for an insn that supports a constant permutation. */
25725
25726 /* Recognize patterns for the VUZP insns. */
25727
25728 static bool
25729 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25730 {
25731 unsigned int i, odd, mask, nelt = d->nelt;
25732 rtx out0, out1, in0, in1, x;
25733 rtx (*gen)(rtx, rtx, rtx, rtx);
25734
25735 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25736 return false;
25737
25738 /* Note that these are little-endian tests. Adjust for big-endian later. */
25739 if (d->perm[0] == 0)
25740 odd = 0;
25741 else if (d->perm[0] == 1)
25742 odd = 1;
25743 else
25744 return false;
25745 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25746
25747 for (i = 0; i < nelt; i++)
25748 {
25749 unsigned elt = (i * 2 + odd) & mask;
25750 if (d->perm[i] != elt)
25751 return false;
25752 }
25753
25754 /* Success! */
25755 if (d->testing_p)
25756 return true;
25757
25758 switch (d->vmode)
25759 {
25760 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25761 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25762 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25763 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25764 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25765 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25766 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25767 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25768 default:
25769 gcc_unreachable ();
25770 }
25771
25772 in0 = d->op0;
25773 in1 = d->op1;
25774 if (BYTES_BIG_ENDIAN)
25775 {
25776 x = in0, in0 = in1, in1 = x;
25777 odd = !odd;
25778 }
25779
25780 out0 = d->target;
25781 out1 = gen_reg_rtx (d->vmode);
25782 if (odd)
25783 x = out0, out0 = out1, out1 = x;
25784
25785 emit_insn (gen (out0, in0, in1, out1));
25786 return true;
25787 }
25788
25789 /* Recognize patterns for the VZIP insns. */
25790
25791 static bool
25792 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25793 {
25794 unsigned int i, high, mask, nelt = d->nelt;
25795 rtx out0, out1, in0, in1, x;
25796 rtx (*gen)(rtx, rtx, rtx, rtx);
25797
25798 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25799 return false;
25800
25801 /* Note that these are little-endian tests. Adjust for big-endian later. */
25802 high = nelt / 2;
25803 if (d->perm[0] == high)
25804 ;
25805 else if (d->perm[0] == 0)
25806 high = 0;
25807 else
25808 return false;
25809 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25810
25811 for (i = 0; i < nelt / 2; i++)
25812 {
25813 unsigned elt = (i + high) & mask;
25814 if (d->perm[i * 2] != elt)
25815 return false;
25816 elt = (elt + nelt) & mask;
25817 if (d->perm[i * 2 + 1] != elt)
25818 return false;
25819 }
25820
25821 /* Success! */
25822 if (d->testing_p)
25823 return true;
25824
25825 switch (d->vmode)
25826 {
25827 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25828 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25829 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25830 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25831 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25832 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25833 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25834 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25835 default:
25836 gcc_unreachable ();
25837 }
25838
25839 in0 = d->op0;
25840 in1 = d->op1;
25841 if (BYTES_BIG_ENDIAN)
25842 {
25843 x = in0, in0 = in1, in1 = x;
25844 high = !high;
25845 }
25846
25847 out0 = d->target;
25848 out1 = gen_reg_rtx (d->vmode);
25849 if (high)
25850 x = out0, out0 = out1, out1 = x;
25851
25852 emit_insn (gen (out0, in0, in1, out1));
25853 return true;
25854 }
25855
25856 /* Recognize patterns for the VREV insns. */
25857
25858 static bool
25859 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25860 {
25861 unsigned int i, j, diff, nelt = d->nelt;
25862 rtx (*gen)(rtx, rtx, rtx);
25863
25864 if (!d->one_vector_p)
25865 return false;
25866
25867 diff = d->perm[0];
25868 switch (diff)
25869 {
25870 case 7:
25871 switch (d->vmode)
25872 {
25873 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25874 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25875 default:
25876 return false;
25877 }
25878 break;
25879 case 3:
25880 switch (d->vmode)
25881 {
25882 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25883 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25884 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25885 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25886 default:
25887 return false;
25888 }
25889 break;
25890 case 1:
25891 switch (d->vmode)
25892 {
25893 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25894 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25895 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25896 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25897 case V4SImode: gen = gen_neon_vrev64v4si; break;
25898 case V2SImode: gen = gen_neon_vrev64v2si; break;
25899 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25900 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25901 default:
25902 return false;
25903 }
25904 break;
25905 default:
25906 return false;
25907 }
25908
25909 for (i = 0; i < nelt ; i += diff + 1)
25910 for (j = 0; j <= diff; j += 1)
25911 {
25912 /* This is guaranteed to be true as the value of diff
25913 is 7, 3, 1 and we should have enough elements in the
25914 queue to generate this. Getting a vector mask with a
25915 value of diff other than these values implies that
25916 something is wrong by the time we get here. */
25917 gcc_assert (i + j < nelt);
25918 if (d->perm[i + j] != i + diff - j)
25919 return false;
25920 }
25921
25922 /* Success! */
25923 if (d->testing_p)
25924 return true;
25925
25926 /* ??? The third operand is an artifact of the builtin infrastructure
25927 and is ignored by the actual instruction. */
25928 emit_insn (gen (d->target, d->op0, const0_rtx));
25929 return true;
25930 }
25931
25932 /* Recognize patterns for the VTRN insns. */
25933
25934 static bool
25935 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25936 {
25937 unsigned int i, odd, mask, nelt = d->nelt;
25938 rtx out0, out1, in0, in1, x;
25939 rtx (*gen)(rtx, rtx, rtx, rtx);
25940
25941 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25942 return false;
25943
25944 /* Note that these are little-endian tests. Adjust for big-endian later. */
25945 if (d->perm[0] == 0)
25946 odd = 0;
25947 else if (d->perm[0] == 1)
25948 odd = 1;
25949 else
25950 return false;
25951 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25952
25953 for (i = 0; i < nelt; i += 2)
25954 {
25955 if (d->perm[i] != i + odd)
25956 return false;
25957 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25958 return false;
25959 }
25960
25961 /* Success! */
25962 if (d->testing_p)
25963 return true;
25964
25965 switch (d->vmode)
25966 {
25967 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25968 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25969 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25970 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25971 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25972 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25973 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25974 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25975 default:
25976 gcc_unreachable ();
25977 }
25978
25979 in0 = d->op0;
25980 in1 = d->op1;
25981 if (BYTES_BIG_ENDIAN)
25982 {
25983 x = in0, in0 = in1, in1 = x;
25984 odd = !odd;
25985 }
25986
25987 out0 = d->target;
25988 out1 = gen_reg_rtx (d->vmode);
25989 if (odd)
25990 x = out0, out0 = out1, out1 = x;
25991
25992 emit_insn (gen (out0, in0, in1, out1));
25993 return true;
25994 }
25995
25996 /* Recognize patterns for the VEXT insns. */
25997
25998 static bool
25999 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
26000 {
26001 unsigned int i, nelt = d->nelt;
26002 rtx (*gen) (rtx, rtx, rtx, rtx);
26003 rtx offset;
26004
26005 unsigned int location;
26006
26007 unsigned int next = d->perm[0] + 1;
26008
26009 /* TODO: Handle GCC's numbering of elements for big-endian. */
26010 if (BYTES_BIG_ENDIAN)
26011 return false;
26012
26013 /* Check if the extracted indexes are increasing by one. */
26014 for (i = 1; i < nelt; next++, i++)
26015 {
26016 /* If we hit the most significant element of the 2nd vector in
26017 the previous iteration, no need to test further. */
26018 if (next == 2 * nelt)
26019 return false;
26020
26021 /* If we are operating on only one vector: it could be a
26022 rotation. If there are only two elements of size < 64, let
26023 arm_evpc_neon_vrev catch it. */
26024 if (d->one_vector_p && (next == nelt))
26025 {
26026 if ((nelt == 2) && (d->vmode != V2DImode))
26027 return false;
26028 else
26029 next = 0;
26030 }
26031
26032 if (d->perm[i] != next)
26033 return false;
26034 }
26035
26036 location = d->perm[0];
26037
26038 switch (d->vmode)
26039 {
26040 case V16QImode: gen = gen_neon_vextv16qi; break;
26041 case V8QImode: gen = gen_neon_vextv8qi; break;
26042 case V4HImode: gen = gen_neon_vextv4hi; break;
26043 case V8HImode: gen = gen_neon_vextv8hi; break;
26044 case V2SImode: gen = gen_neon_vextv2si; break;
26045 case V4SImode: gen = gen_neon_vextv4si; break;
26046 case V2SFmode: gen = gen_neon_vextv2sf; break;
26047 case V4SFmode: gen = gen_neon_vextv4sf; break;
26048 case V2DImode: gen = gen_neon_vextv2di; break;
26049 default:
26050 return false;
26051 }
26052
26053 /* Success! */
26054 if (d->testing_p)
26055 return true;
26056
26057 offset = GEN_INT (location);
26058 emit_insn (gen (d->target, d->op0, d->op1, offset));
26059 return true;
26060 }
26061
26062 /* The NEON VTBL instruction is a fully variable permuation that's even
26063 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26064 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26065 can do slightly better by expanding this as a constant where we don't
26066 have to apply a mask. */
26067
26068 static bool
26069 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26070 {
26071 rtx rperm[MAX_VECT_LEN], sel;
26072 enum machine_mode vmode = d->vmode;
26073 unsigned int i, nelt = d->nelt;
26074
26075 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26076 numbering of elements for big-endian, we must reverse the order. */
26077 if (BYTES_BIG_ENDIAN)
26078 return false;
26079
26080 if (d->testing_p)
26081 return true;
26082
26083 /* Generic code will try constant permutation twice. Once with the
26084 original mode and again with the elements lowered to QImode.
26085 So wait and don't do the selector expansion ourselves. */
26086 if (vmode != V8QImode && vmode != V16QImode)
26087 return false;
26088
26089 for (i = 0; i < nelt; ++i)
26090 rperm[i] = GEN_INT (d->perm[i]);
26091 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26092 sel = force_reg (vmode, sel);
26093
26094 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26095 return true;
26096 }
26097
26098 static bool
26099 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26100 {
26101 /* Check if the input mask matches vext before reordering the
26102 operands. */
26103 if (TARGET_NEON)
26104 if (arm_evpc_neon_vext (d))
26105 return true;
26106
26107 /* The pattern matching functions above are written to look for a small
26108 number to begin the sequence (0, 1, N/2). If we begin with an index
26109 from the second operand, we can swap the operands. */
26110 if (d->perm[0] >= d->nelt)
26111 {
26112 unsigned i, nelt = d->nelt;
26113 rtx x;
26114
26115 for (i = 0; i < nelt; ++i)
26116 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26117
26118 x = d->op0;
26119 d->op0 = d->op1;
26120 d->op1 = x;
26121 }
26122
26123 if (TARGET_NEON)
26124 {
26125 if (arm_evpc_neon_vuzp (d))
26126 return true;
26127 if (arm_evpc_neon_vzip (d))
26128 return true;
26129 if (arm_evpc_neon_vrev (d))
26130 return true;
26131 if (arm_evpc_neon_vtrn (d))
26132 return true;
26133 return arm_evpc_neon_vtbl (d);
26134 }
26135 return false;
26136 }
26137
26138 /* Expand a vec_perm_const pattern. */
26139
26140 bool
26141 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26142 {
26143 struct expand_vec_perm_d d;
26144 int i, nelt, which;
26145
26146 d.target = target;
26147 d.op0 = op0;
26148 d.op1 = op1;
26149
26150 d.vmode = GET_MODE (target);
26151 gcc_assert (VECTOR_MODE_P (d.vmode));
26152 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26153 d.testing_p = false;
26154
26155 for (i = which = 0; i < nelt; ++i)
26156 {
26157 rtx e = XVECEXP (sel, 0, i);
26158 int ei = INTVAL (e) & (2 * nelt - 1);
26159 which |= (ei < nelt ? 1 : 2);
26160 d.perm[i] = ei;
26161 }
26162
26163 switch (which)
26164 {
26165 default:
26166 gcc_unreachable();
26167
26168 case 3:
26169 d.one_vector_p = false;
26170 if (!rtx_equal_p (op0, op1))
26171 break;
26172
26173 /* The elements of PERM do not suggest that only the first operand
26174 is used, but both operands are identical. Allow easier matching
26175 of the permutation by folding the permutation into the single
26176 input vector. */
26177 /* FALLTHRU */
26178 case 2:
26179 for (i = 0; i < nelt; ++i)
26180 d.perm[i] &= nelt - 1;
26181 d.op0 = op1;
26182 d.one_vector_p = true;
26183 break;
26184
26185 case 1:
26186 d.op1 = op0;
26187 d.one_vector_p = true;
26188 break;
26189 }
26190
26191 return arm_expand_vec_perm_const_1 (&d);
26192 }
26193
26194 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26195
26196 static bool
26197 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
26198 const unsigned char *sel)
26199 {
26200 struct expand_vec_perm_d d;
26201 unsigned int i, nelt, which;
26202 bool ret;
26203
26204 d.vmode = vmode;
26205 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26206 d.testing_p = true;
26207 memcpy (d.perm, sel, nelt);
26208
26209 /* Categorize the set of elements in the selector. */
26210 for (i = which = 0; i < nelt; ++i)
26211 {
26212 unsigned char e = d.perm[i];
26213 gcc_assert (e < 2 * nelt);
26214 which |= (e < nelt ? 1 : 2);
26215 }
26216
26217 /* For all elements from second vector, fold the elements to first. */
26218 if (which == 2)
26219 for (i = 0; i < nelt; ++i)
26220 d.perm[i] -= nelt;
26221
26222 /* Check whether the mask can be applied to the vector type. */
26223 d.one_vector_p = (which != 3);
26224
26225 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
26226 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
26227 if (!d.one_vector_p)
26228 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
26229
26230 start_sequence ();
26231 ret = arm_expand_vec_perm_const_1 (&d);
26232 end_sequence ();
26233
26234 return ret;
26235 }
26236
26237 bool
26238 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
26239 {
26240 /* If we are soft float and we do not have ldrd
26241 then all auto increment forms are ok. */
26242 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26243 return true;
26244
26245 switch (code)
26246 {
26247 /* Post increment and Pre Decrement are supported for all
26248 instruction forms except for vector forms. */
26249 case ARM_POST_INC:
26250 case ARM_PRE_DEC:
26251 if (VECTOR_MODE_P (mode))
26252 {
26253 if (code != ARM_PRE_DEC)
26254 return true;
26255 else
26256 return false;
26257 }
26258
26259 return true;
26260
26261 case ARM_POST_DEC:
26262 case ARM_PRE_INC:
26263 /* Without LDRD and mode size greater than
26264 word size, there is no point in auto-incrementing
26265 because ldm and stm will not have these forms. */
26266 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26267 return false;
26268
26269 /* Vector and floating point modes do not support
26270 these auto increment forms. */
26271 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26272 return false;
26273
26274 return true;
26275
26276 default:
26277 return false;
26278
26279 }
26280
26281 return false;
26282 }
26283
26284 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26285 on ARM, since we know that shifts by negative amounts are no-ops.
26286 Additionally, the default expansion code is not available or suitable
26287 for post-reload insn splits (this can occur when the register allocator
26288 chooses not to do a shift in NEON).
26289
26290 This function is used in both initial expand and post-reload splits, and
26291 handles all kinds of 64-bit shifts.
26292
26293 Input requirements:
26294 - It is safe for the input and output to be the same register, but
26295 early-clobber rules apply for the shift amount and scratch registers.
26296 - Shift by register requires both scratch registers. Shift by a constant
26297 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26298 the scratch registers may be NULL.
26299 - Ashiftrt by a register also clobbers the CC register. */
26300 void
26301 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26302 rtx amount, rtx scratch1, rtx scratch2)
26303 {
26304 rtx out_high = gen_highpart (SImode, out);
26305 rtx out_low = gen_lowpart (SImode, out);
26306 rtx in_high = gen_highpart (SImode, in);
26307 rtx in_low = gen_lowpart (SImode, in);
26308
26309 /* Terminology:
26310 in = the register pair containing the input value.
26311 out = the destination register pair.
26312 up = the high- or low-part of each pair.
26313 down = the opposite part to "up".
26314 In a shift, we can consider bits to shift from "up"-stream to
26315 "down"-stream, so in a left-shift "up" is the low-part and "down"
26316 is the high-part of each register pair. */
26317
26318 rtx out_up = code == ASHIFT ? out_low : out_high;
26319 rtx out_down = code == ASHIFT ? out_high : out_low;
26320 rtx in_up = code == ASHIFT ? in_low : in_high;
26321 rtx in_down = code == ASHIFT ? in_high : in_low;
26322
26323 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26324 gcc_assert (out
26325 && (REG_P (out) || GET_CODE (out) == SUBREG)
26326 && GET_MODE (out) == DImode);
26327 gcc_assert (in
26328 && (REG_P (in) || GET_CODE (in) == SUBREG)
26329 && GET_MODE (in) == DImode);
26330 gcc_assert (amount
26331 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26332 && GET_MODE (amount) == SImode)
26333 || CONST_INT_P (amount)));
26334 gcc_assert (scratch1 == NULL
26335 || (GET_CODE (scratch1) == SCRATCH)
26336 || (GET_MODE (scratch1) == SImode
26337 && REG_P (scratch1)));
26338 gcc_assert (scratch2 == NULL
26339 || (GET_CODE (scratch2) == SCRATCH)
26340 || (GET_MODE (scratch2) == SImode
26341 && REG_P (scratch2)));
26342 gcc_assert (!REG_P (out) || !REG_P (amount)
26343 || !HARD_REGISTER_P (out)
26344 || (REGNO (out) != REGNO (amount)
26345 && REGNO (out) + 1 != REGNO (amount)));
26346
26347 /* Macros to make following code more readable. */
26348 #define SUB_32(DEST,SRC) \
26349 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26350 #define RSB_32(DEST,SRC) \
26351 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26352 #define SUB_S_32(DEST,SRC) \
26353 gen_addsi3_compare0 ((DEST), (SRC), \
26354 GEN_INT (-32))
26355 #define SET(DEST,SRC) \
26356 gen_rtx_SET (SImode, (DEST), (SRC))
26357 #define SHIFT(CODE,SRC,AMOUNT) \
26358 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26359 #define LSHIFT(CODE,SRC,AMOUNT) \
26360 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26361 SImode, (SRC), (AMOUNT))
26362 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26363 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26364 SImode, (SRC), (AMOUNT))
26365 #define ORR(A,B) \
26366 gen_rtx_IOR (SImode, (A), (B))
26367 #define BRANCH(COND,LABEL) \
26368 gen_arm_cond_branch ((LABEL), \
26369 gen_rtx_ ## COND (CCmode, cc_reg, \
26370 const0_rtx), \
26371 cc_reg)
26372
26373 /* Shifts by register and shifts by constant are handled separately. */
26374 if (CONST_INT_P (amount))
26375 {
26376 /* We have a shift-by-constant. */
26377
26378 /* First, handle out-of-range shift amounts.
26379 In both cases we try to match the result an ARM instruction in a
26380 shift-by-register would give. This helps reduce execution
26381 differences between optimization levels, but it won't stop other
26382 parts of the compiler doing different things. This is "undefined
26383 behaviour, in any case. */
26384 if (INTVAL (amount) <= 0)
26385 emit_insn (gen_movdi (out, in));
26386 else if (INTVAL (amount) >= 64)
26387 {
26388 if (code == ASHIFTRT)
26389 {
26390 rtx const31_rtx = GEN_INT (31);
26391 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26392 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26393 }
26394 else
26395 emit_insn (gen_movdi (out, const0_rtx));
26396 }
26397
26398 /* Now handle valid shifts. */
26399 else if (INTVAL (amount) < 32)
26400 {
26401 /* Shifts by a constant less than 32. */
26402 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
26403
26404 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26405 emit_insn (SET (out_down,
26406 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26407 out_down)));
26408 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26409 }
26410 else
26411 {
26412 /* Shifts by a constant greater than 31. */
26413 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
26414
26415 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26416 if (code == ASHIFTRT)
26417 emit_insn (gen_ashrsi3 (out_up, in_up,
26418 GEN_INT (31)));
26419 else
26420 emit_insn (SET (out_up, const0_rtx));
26421 }
26422 }
26423 else
26424 {
26425 /* We have a shift-by-register. */
26426 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26427
26428 /* This alternative requires the scratch registers. */
26429 gcc_assert (scratch1 && REG_P (scratch1));
26430 gcc_assert (scratch2 && REG_P (scratch2));
26431
26432 /* We will need the values "amount-32" and "32-amount" later.
26433 Swapping them around now allows the later code to be more general. */
26434 switch (code)
26435 {
26436 case ASHIFT:
26437 emit_insn (SUB_32 (scratch1, amount));
26438 emit_insn (RSB_32 (scratch2, amount));
26439 break;
26440 case ASHIFTRT:
26441 emit_insn (RSB_32 (scratch1, amount));
26442 /* Also set CC = amount > 32. */
26443 emit_insn (SUB_S_32 (scratch2, amount));
26444 break;
26445 case LSHIFTRT:
26446 emit_insn (RSB_32 (scratch1, amount));
26447 emit_insn (SUB_32 (scratch2, amount));
26448 break;
26449 default:
26450 gcc_unreachable ();
26451 }
26452
26453 /* Emit code like this:
26454
26455 arithmetic-left:
26456 out_down = in_down << amount;
26457 out_down = (in_up << (amount - 32)) | out_down;
26458 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26459 out_up = in_up << amount;
26460
26461 arithmetic-right:
26462 out_down = in_down >> amount;
26463 out_down = (in_up << (32 - amount)) | out_down;
26464 if (amount < 32)
26465 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26466 out_up = in_up << amount;
26467
26468 logical-right:
26469 out_down = in_down >> amount;
26470 out_down = (in_up << (32 - amount)) | out_down;
26471 if (amount < 32)
26472 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26473 out_up = in_up << amount;
26474
26475 The ARM and Thumb2 variants are the same but implemented slightly
26476 differently. If this were only called during expand we could just
26477 use the Thumb2 case and let combine do the right thing, but this
26478 can also be called from post-reload splitters. */
26479
26480 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26481
26482 if (!TARGET_THUMB2)
26483 {
26484 /* Emit code for ARM mode. */
26485 emit_insn (SET (out_down,
26486 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26487 if (code == ASHIFTRT)
26488 {
26489 rtx done_label = gen_label_rtx ();
26490 emit_jump_insn (BRANCH (LT, done_label));
26491 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26492 out_down)));
26493 emit_label (done_label);
26494 }
26495 else
26496 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26497 out_down)));
26498 }
26499 else
26500 {
26501 /* Emit code for Thumb2 mode.
26502 Thumb2 can't do shift and or in one insn. */
26503 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26504 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26505
26506 if (code == ASHIFTRT)
26507 {
26508 rtx done_label = gen_label_rtx ();
26509 emit_jump_insn (BRANCH (LT, done_label));
26510 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26511 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26512 emit_label (done_label);
26513 }
26514 else
26515 {
26516 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26517 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26518 }
26519 }
26520
26521 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26522 }
26523
26524 #undef SUB_32
26525 #undef RSB_32
26526 #undef SUB_S_32
26527 #undef SET
26528 #undef SHIFT
26529 #undef LSHIFT
26530 #undef REV_LSHIFT
26531 #undef ORR
26532 #undef BRANCH
26533 }
26534
26535
26536 /* Returns true if a valid comparison operation and makes
26537 the operands in a form that is valid. */
26538 bool
26539 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26540 {
26541 enum rtx_code code = GET_CODE (*comparison);
26542 enum rtx_code canonical_code;
26543 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26544 ? GET_MODE (*op2) : GET_MODE (*op1);
26545
26546 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26547
26548 if (code == UNEQ || code == LTGT)
26549 return false;
26550
26551 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26552 PUT_CODE (*comparison, canonical_code);
26553
26554 switch (mode)
26555 {
26556 case SImode:
26557 if (!arm_add_operand (*op1, mode))
26558 *op1 = force_reg (mode, *op1);
26559 if (!arm_add_operand (*op2, mode))
26560 *op2 = force_reg (mode, *op2);
26561 return true;
26562
26563 case DImode:
26564 if (!cmpdi_operand (*op1, mode))
26565 *op1 = force_reg (mode, *op1);
26566 if (!cmpdi_operand (*op2, mode))
26567 *op2 = force_reg (mode, *op2);
26568 return true;
26569
26570 case SFmode:
26571 case DFmode:
26572 if (!arm_float_compare_operand (*op1, mode))
26573 *op1 = force_reg (mode, *op1);
26574 if (!arm_float_compare_operand (*op2, mode))
26575 *op2 = force_reg (mode, *op2);
26576 return true;
26577 default:
26578 break;
26579 }
26580
26581 return false;
26582
26583 }
26584
26585 #include "gt-arm.h"